Loading all data

# study area
# studyarea <- st_read("data/StudyArea.shp") %>% 
studyarea <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/studyarea/StudyArea.shp") %>%
  st_transform('EPSG:2272')
## Reading layer `StudyArea' from data source 
##   `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/studyarea/StudyArea.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 1 field
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -8367662 ymin: 4859107 xmax: -8365440 ymax: 4860628
## Projected CRS: WGS 84 / Pseudo-Mercator
# Philly block groups
# blockgroups <- st_read("data/Philly_blockgroup.shp") %>%
blockgroups <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/Philly_blockgroup/Philly_blockgroup.shp") %>%
  st_transform('EPSG:2272')
## Reading layer `Philly_blockgroup' from data source 
##   `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/Philly_blockgroup/Philly_blockgroup.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1338 features and 18 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -8380161 ymin: 4846701 xmax: -8344037 ymax: 4886015
## Projected CRS: WGS 84 / Pseudo-Mercator
# philly bounds
philly_bounds <- st_union(blockgroups)

# philly hydrology (bounded by philly_bounds; source: https://opendataphilly.org/datasets/hydrology/)
hydro <- st_read("https://services.arcgis.com/fLeGjb7u4uXqeF9q/arcgis/rest/services/Hydrographic_Features_Poly/FeatureServer/1/query?outFields=*&where=1%3D1&f=geojson") %>% 
    st_transform(crs = 'EPSG:2272') %>% 
  st_intersection(philly_bounds)
## Reading layer `OGRGeoJSON' from data source 
##   `https://services.arcgis.com/fLeGjb7u4uXqeF9q/arcgis/rest/services/Hydrographic_Features_Poly/FeatureServer/1/query?outFields=*&where=1%3D1&f=geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 7970 features and 29 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -76.18462 ymin: 38.7667 xmax: -74.71871 ymax: 40.66593
## Geodetic CRS:  WGS 84
# highway
# stateroads_inphilly <- st_read("data/PaStateRoads2024_03.geojson") %>% 
#   st_transform('EPSG:2272') %>% 
#   st_intersection(philly_bounds)

# write out stateroads_inphilly as its own file since it's big
# st_write(stateroads_inphilly,
#          "data/PhillyStateRoads.shp")

# read in pre-filtered stateroads data
stateroads_inphilly <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/PhillyStateRoads.shp")
## Reading layer `PhillyStateRoads' from data source 
##   `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/PhillyStateRoads.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2315 features and 105 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: 2660586 ymin: 207551.4 xmax: 2749290 ymax: 304306.6
## Projected CRS: NAD83 / Pennsylvania South (ftUS)
philly_mainhighways <- stateroads_inphilly %>%
  filter(TRAF_RT_NO %in% c("I", "US"),
         ST_RT_NO %in% c("0001", "0095", "0076", "0676")) %>%
  dplyr::select(STREET_NAM, ST_RT_NO, TRAF_RT_NO, TRAF_RT__1)

# save and only use the highways of interest
# for now, interested in comparing highways that cut through neighborhoods vs those that don't

# ACS 


# philly property data (https://opendataphilly.org/datasets/philadelphia-properties-and-assessment-history/) <-- that was the original source, but now i'm using the same downloaded file that Luming and Sijia have been working from
# philly_properties <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/opa_properties_public.geojson") %>% 
#   st_transform(crs = 'EPSG:2272')

# new property CSV
# this is a CSV and doesn't have the right geometry col
# philly_properties_new <- read_csv("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/property_philly_250307.csv") %>% 
#   rename(old_geom = geometry) %>% 
#   left_join(philly_properties %>% 
#               select(objectid, geometry), # joining with original properties data to get geometries
#             by = "objectid") %>% 
#   st_as_sf(crs = 'EPSG:2272')

# write out file as geojson
# st_write(philly_properties_new,
#          "~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/property_philly_250307.geojson")

# new property data (from 3/7/25 with inflation adjusted property prices)
philly_properties <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/property_philly_250318.geojson")
## Reading layer `property_philly' from data source 
##   `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/property_philly_250318.geojson' 
##   using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 90508 features and 75 fields (with 13 geometries empty)
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 2662223 ymin: 211097.5 xmax: 2749274 ymax: 304860.8
## Projected CRS: NAD83 / Pennsylvania South (ftUS)
# philly park data (https://opendataphilly.org/datasets/ppr-properties/)
philly_parks <- st_read("https://opendata.arcgis.com/datasets/d52445160ab14380a673e5849203eb64_0.geojson") %>%
  st_transform(crs = 'EPSG:2272') %>% 
  st_intersection(philly_bounds)
## Reading layer `PPR_Properties' from data source 
##   `https://opendata.arcgis.com/datasets/d52445160ab14380a673e5849203eb64_0.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 507 features and 25 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -75.28353 ymin: 39.87117 xmax: -74.95865 ymax: 40.13186
## Geodetic CRS:  WGS 84
# philly schools
school <-
  st_read("https://opendata.arcgis.com/datasets/d46a7e59e2c246c891fbee778759717e_0.geojson") %>%
  st_transform('EPSG:2272')
## Reading layer `Schools' from data source 
##   `https://opendata.arcgis.com/datasets/d46a7e59e2c246c891fbee778759717e_0.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 495 features and 15 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -75.2665 ymin: 39.90781 xmax: -74.97057 ymax: 40.12974
## Geodetic CRS:  WGS 84
# city facilities (https://opendataphilly.org/datasets/city-facilities-master-facilities-database/)
facilities <- st_read("https://opendata.arcgis.com/datasets/b3c133c3b15d4c96bcd4d5cc09f19f4e_0.geojson") %>%
  st_transform('EPSG:2272') %>% 
  filter(STATUS == "A") # remove inactive sites
## Reading layer `City_Facilities_pub' from data source 
##   `https://opendata.arcgis.com/datasets/b3c133c3b15d4c96bcd4d5cc09f19f4e_0.geojson' 
##   using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 3197 features and 32 fields (with 5 geometries empty)
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -75.28169 ymin: 39.86686 xmax: -74.96455 ymax: 40.13108
## Geodetic CRS:  WGS 84
# libraries (from facilities data)
libraries <- facilities %>% 
  filter(ASSET_GROUP1 == "A8")

# prisons (from facilities data)
prisons <- facilities %>% 
  filter(ASSET_GROUP2 == "A13.3")

# historic site (from facilities data)
historic <- facilities %>% 
  filter(ASSET_SUBTYPE1 == "C21")

# museums (from facilities data)
museums <- facilities %>% 
  filter(ASSET_GROUP1 == "A18",
         ASSET_SUBTYPE1 == "C35")

# produce (LPSS, HPSS -- indicators for produce; low or high produce supply stores)
retail_produce <- st_read("https://opendata.arcgis.com/datasets/53b8a1c653a74c92b2de23a5d7bf04a0_0.geojson") %>%
  st_transform('EPSG:2272')
## Reading layer `370eb703-5a4f-4abb-8920-727cef31373b2020329-1-rcimn.5n39o' from data source `https://opendata.arcgis.com/datasets/53b8a1c653a74c92b2de23a5d7bf04a0_0.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 1336 features and 16 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -75.28031 ymin: 39.86747 xmax: -74.95575 ymax: 40.13793
## Geodetic CRS:  WGS 84
# transit stops
metro <- st_read("https://opendata.arcgis.com/api/v3/datasets/af52d74b872045d0abb4a6bbbb249453_0/downloads/data?format=geojson&spatialRefId=4326") %>%
  st_transform('EPSG:2272') %>%
  mutate(Type = "metro")
## Reading layer `Highspeed_Stations' from data source 
##   `https://opendata.arcgis.com/api/v3/datasets/af52d74b872045d0abb4a6bbbb249453_0/downloads/data?format=geojson&spatialRefId=4326' 
##   using driver `GeoJSON'
## Simple feature collection with 74 features and 5 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -75.35358 ymin: 39.90543 xmax: -75.07788 ymax: 40.11349
## Geodetic CRS:  WGS 84
city_hall <- metro[metro$Station == 'City Hall', 6]

trolley <- st_read("https://opendata.arcgis.com/api/v3/datasets/dd2afb618d804100867dfe0669383159_0/downloads/data?format=geojson&spatialRefId=4326") %>%
  st_transform('EPSG:2272')
## Reading layer `Trolley_Stations' from data source 
##   `https://opendata.arcgis.com/api/v3/datasets/dd2afb618d804100867dfe0669383159_0/downloads/data?format=geojson&spatialRefId=4326' 
##   using driver `GeoJSON'
## Simple feature collection with 60 features and 5 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -75.394 ymin: 39.90667 xmax: -75.16256 ymax: 39.96216
## Geodetic CRS:  WGS 84
# why are some trolley stations missing? going southwest from 40th st station + going northwest from 33rd/34th station

trolley_renamed <- trolley %>%
    rename(Station = StopName,
           Route = LineAbbr,
           Longitude = Lon,
           Latitude = Lat) %>%
    mutate(Type = "trolley")

# Combine both datasets into one
transit <- bind_rows(metro, trolley_renamed)

# PA hospitals (within 2.5 miles of philly bounds)
hospitals <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/DOH_Hospitals202311.geojson") %>% 
  st_transform('EPSG:2272') %>% 
  st_intersection(st_buffer(philly_bounds, 13200))
## Reading layer `DOH_Hospitals202311' from data source 
##   `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/DOH_Hospitals202311.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 226 features and 14 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -80.49621 ymin: 39.75163 xmax: -74.86704 ymax: 42.13403
## Geodetic CRS:  WGS 84
# farmers markets
farmersmarkets <- st_read("https://opendata.arcgis.com/datasets/0707c1f31e2446e881d680b0a5ee54bc_0.geojson") %>% 
  st_transform('EPSG:2272')
## Reading layer `Farmers_Markets' from data source 
##   `https://opendata.arcgis.com/datasets/0707c1f31e2446e881d680b0a5ee54bc_0.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 22 features and 53 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -75.22069 ymin: 39.92713 xmax: -75.05142 ymax: 40.06754
## Geodetic CRS:  WGS 84
# checking out what Luming_property.csv looks like
luming_property <- read.csv("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/Luming_property.csv")
# looking at which properties were saved in luming_property -- Luming and Sijia's properties files are limited to the case study area
# luming_properties <- philly_properties %>% 
#   filter(objectid %in% luming_property$objectid)

# checking out what property_sijia_eda.geojson looks like
sijia_eda <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/property_sijia_eda.geojson")
## Reading layer `property_sijia_eda' from data source 
##   `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/property_sijia_eda.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 2394 features and 98 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 2692333 ymin: 236562 xmax: 2697665 ymax: 240121.5
## Projected CRS: NAD83 / Pennsylvania South (ftUS)
# google API to get restaurant data
# incomplete, will figure this out later
# restaurant_results <- google_places(search_string = paste(search_terms, city, sep = " "), 
#                                     key = google_api)

Initial plots

# interstate highways in philly
# ggplot() +
#     geom_sf(data = philly_bounds) +
#     geom_sf(data = stateroads_inphilly %>% 
#               filter(TRAF_RT_NO == "I"))
# 
# # US highways in philly
# ggplot() +
#     geom_sf(data = philly_bounds) +
#     geom_sf(data = stateroads_inphilly %>% 
#               filter(TRAF_RT_NO == "US"))
# 
# # PA state highways in philly
# ggplot() +
#     geom_sf(data = philly_bounds) +
#     geom_sf(data = stateroads_inphilly %>% 
#               filter(TRAF_RT_NO == "PA"))
# 
# # all other roads in philly
# ggplot() +
#     geom_sf(data = philly_bounds) +
#     geom_sf(data = stateroads_inphilly %>% 
#               filter(!(TRAF_RT_NO %in% c("I", "US", "PA"))))

# I-95, US-1, I-676, and I-76
philly_highways_plot <- 
  ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "#57470a") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = philly_mainhighways,
          aes(color = ST_RT_NO)) +
  # add custom colors
  scale_color_manual(values = c("0001" = "#1a9988",
                                "0076" = "#eb5600",
                                "0095" = "#eba075",
                                "0676" = "#7fe3d6"),
                     labels = c("US-1", "I-76", "I-95", "I-676")) +
  labs(title = "Case Study of Highways in Philly",
       subtitle = "US-1, I-76, I-95, and I-676",
       color = "Highways") +
  theme_void()
philly_highways_plot

# ggsave("Output/HighwaysMap.png", plot = philly_highways_plot)

# looking at study area within philly bounds
# ggplot() +
#   geom_sf(data = philly_bounds, fill = "#c9b887") + 
#   geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
#   geom_sf(data = studyarea, fill = "coral")

# philly parks
# ggplot() +
#   geom_sf(data = philly_bounds, fill = "#c9b887") + 
#   geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
#   geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent")

# philly schools
# ggplot() +
#   geom_sf(data = philly_bounds, fill = "#c9b887") + 
#   geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
#   geom_sf(data = school, color = "brown4") # point data

# libraries
# ggplot() +
#   geom_sf(data = philly_bounds, fill = "#c9b887") + 
#   geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
#   geom_sf(data = libraries, color = "#eb5600") # point data

# transit / looking at what this category actually consists of....
# looks like there's point geography for parking lots, some random transit stops, and heli pad station
# ggplot() +
#   geom_sf(data = philly_bounds, fill = "#c9b887") + 
#   geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
#   geom_sf(data = facilities %>% 
#             filter(ASSET_GROUP1 == "A17",
#                    grepl("Parking", ASSET_SUBT1_DESC)), color = "pink") # point data

# ggplot() +
#   geom_sf(data = philly_bounds, fill = "#c9b887") + 
#   geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
#   geom_sf(data = sijia_eda, color = "#7fe3d6")

# prison facilities
# ggplot() +
#   geom_sf(data = philly_bounds, fill = "#c9b887") + 
#   geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
#   geom_sf(data = facilities %>% 
#             filter(ASSET_GROUP2 == "A13.3"), color = "lightgrey") # point data

# all philly properties
# ggplot() +
#   geom_sf(data = philly_bounds, fill = "#c9b887") + 
#   geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
#   geom_sf(data = philly_properties, color = "black")

Case Study of Philadelphia’s Highways and Housing Price

For all Philly properties, calculate the distance from highway and see how it relates to price.

# taking distance calculator function (from functions.R in PPA textbook)
calculate_nearest_distance <- function(set_points, other_layer) {
  nearest_idx <- st_nearest_feature(set_points, other_layer)
  st_distance(set_points, other_layer[nearest_idx, ], by_element = TRUE) %>% as.numeric()
}

The following code chunk took too much to load (too many properties to consider), so I’m going to limit it to the properties within 500m (and possibly look at 1000m and 1500m) from highways to look at the immediate effects of highways on property prices.

# only keep properties that are within 500m (1640.42 ft) of highways of interest
buff500 <- 500 * 3.28084
highways_500buffer <- st_union(st_buffer(philly_mainhighways, buff500))

properties500 <- philly_properties[highways_500buffer,]

# only keep properties that are within 1000m (3280.84 ft) of highways of interest
buff1000 <- 1000 * 3.28084
highways_1000buffer <- st_union(st_buffer(philly_mainhighways, buff1000))

properties1000 <- philly_properties[highways_1000buffer,]

Properties within 500m of highways

# visualizing buffer
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "#57470a") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = highways_500buffer,
          color = "transparent", fill = "grey", alpha = 0.4) +
  geom_sf(data = philly_mainhighways,
          aes(color = ST_RT_NO)) +
  
  # add custom colors
  scale_color_manual(values = c("0001" = "#1a9988",
                                "0076" = "#eb5600",
                                "0095" = "#eba075",
                                "0676" = "#7fe3d6")) +
  
  labs(title = "Case Study of Highways in Philly",
       subtitle = "US-1, I-76, I-95, and I-676",
       color = "Highways",
       fill = "Highways") +
  theme_void()

# visualizing properties to make sure we got the right ones
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "#57470a") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = highways_500buffer,
          color = "transparent", fill = "grey", alpha = 0.4) +
  geom_sf(data = properties500,
          color = "#4a4a4a", size = .3) +
  geom_sf(data = philly_mainhighways,
          aes(color = ST_RT_NO)) +
  
  # add custom colors
  scale_color_manual(values = c("0001" = "#1a9988",
                                "0076" = "#eb5600",
                                "0095" = "#eba075",
                                "0676" = "#7fe3d6")) +
  
  labs(title = "Properties within 500m of Highways in Philly",
       subtitle = "US-1, I-76, I-95, and I-676",
       color = "Highways",
       fill = "Highways") +
  theme_void()

ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "#57470a") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = highways_1000buffer,
          color = "transparent", fill = "grey", alpha = 0.4) +
  geom_sf(data = properties1000,
          color = "#4a4a4a", size = .3) +
  geom_sf(data = philly_mainhighways,
          aes(color = ST_RT_NO)) +
  
  # add custom colors
  scale_color_manual(values = c("0001" = "#1a9988",
                                "0076" = "#eb5600",
                                "0095" = "#eba075",
                                "0676" = "#7fe3d6"),
                       labels = c("US-1", "I-76", "I-95", "I-676")) +
  
  labs(title = "Properties within 1000m of Highways in Philly",
       subtitle = "US-1, I-76, I-95, and I-676",
       color = "Highways",
       fill = "Highways") +
  theme_void()

# checking how infaltion-adjusted sale prices look compared to raw sale price
philly_properties %>% 
  select(sale_date, sale_price, adjusted_sale_price) %>% 
  st_drop_geometry() %>% 
  pivot_longer(cols = 2:3,
               names_to = "adj",
               values_to = "price") %>% 
  mutate(adj = factor(adj, levels = c("sale_price", "adjusted_sale_price"))) %>% 
  ggplot(aes(x = sale_date, y = price)) +
  geom_point() +
  facet_wrap(~adj) +
  labs(title = "Sale Price over Time") +
  theme_minimal()

# adjusted_sale_price vs sale_price, colored by sale_date
ggplot(philly_properties,
       aes(x = sale_price, y = adjusted_sale_price, color = sale_date)) +
    geom_point() +
    geom_smooth(method = "lm") +
  theme_minimal()

It looks like the inflation adjusted sales prices are a bit higher than the sales prices of 2024.

Are there differences between how price of property relates to dist_to_closest_highway depending on type of highway (whether highway cuts through neighborhood or not)?

# starting with 500m buffer
# need to check if there are 0 values in sale_price before log transforming
summary(properties500$sale_price) # 16,430 rows
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##    30345   166000   249900   461129   395000 15500000
#     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
#    30345   166000   249900   461129   395000 15500000 

summary(properties500$adjusted_sale_price)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##    31569   190426   285105   529182   448071 19426645
#    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
#   31569   190426   285105   529182   448071 19426645 

# using property data that has already been filtered
properties500_clean <- properties500 %>% 
  filter(total_area > 1) %>% # still keeping this filter for now
  mutate(price_perTLA = adjusted_sale_price / total_livable_area,
         price_perTA = adjusted_sale_price / total_area,
         log_price = log(adjusted_sale_price),
         log_price_perTLA = log(price_perTLA),
         log_price_perTA = log(price_perTA),
         category_easy = case_when(category_code_description %in% c("APARTMENTS  > 4 UNITS",
                                                                    "MIXED USE",
                                                                    "MULTI FAMILY",
                                                                    "SINGLE FAMILY") ~ "Residential or Mixed",
                                   grepl("INDUS", category_code_description) ~ "Industrial",
                                   grepl("HOTEL", category_code_description) ~ "Hotel",
                                   grepl("OFFICES", category_code_description) ~ "Offices",
                                   category_code_description %in% c("COMMERCIAL",
                                                                    "GARAGE - COMMERCIAL",
                                                                    "RETAIL") ~ "Commercial",
                                   grepl("VACANT", category_code_description) ~ "Vacant",
                                   .default = "Other"))

summary(properties500_clean$adjusted_sale_price) # 13,345 rows
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##    31569   190426   285105   529182   448071 19426645
#    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
#   31569   190426   285105   529182   448071 19426645 
summary(properties500_clean$price_perTLA)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   8.232 149.884 220.229     Inf 310.111     Inf     404
#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#  8.232 149.884 220.229     Inf 310.111     Inf     404 
summary(properties500_clean$price_perTA)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##     1.179   109.486   175.165   386.567   405.577 11539.259
#   Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
#  1.179   109.486   175.165   386.567   405.577 11539.259

# correlations between total_area and total_livable_area, and sale_price
# cor(properties500_clean$total_area, properties500_clean$total_livable_area, use = "pairwise.complete.obs") # 0.5581733
# cor(properties500_clean$total_area, properties500_clean$sale_price, use = "pairwise.complete.obs") # 0.4357107
# cor(properties500_clean$total_livable_area, properties500_clean$sale_price, use = "pairwise.complete.obs") # 0.5772053

# histogram of sale_price
ggplot(data = properties500_clean) +
  geom_histogram(aes(x = adjusted_sale_price),
                 bins = 100) +
  theme_minimal() +
  labs(title = "Distribution of Adjusted Sale Price")

ggplot(data = properties500_clean) +
  geom_histogram(aes(x = price_perTA),
                 bins = 100) +
  theme_minimal() +
  labs(title = "Distribution of Sale Price per total area")

ggplot(data = properties500_clean) +
  geom_histogram(aes(x = price_perTLA),
                 bins = 100) +
  theme_minimal() +
  labs(title = "Distribution of Sale Price per total livable area")

ggplot(data = properties500_clean) +
  geom_histogram(aes(x = log_price),
                 bins = 100) +
  theme_minimal() +
  labs(title = "Distribution of Adjusted Sale Price (log-transformed)")

ggplot(data = properties500_clean) +
  geom_histogram(aes(x = log_price_perTA),
                 bins = 100) +
  theme_minimal() +
  labs(title = "Distribution of Sale Price per total area (log-transformed)")

ggplot(data = properties500_clean) +
  geom_histogram(aes(x = log_price_perTLA),
                 bins = 100) +
  theme_minimal() +
  labs(title = "Distribution of Sale Price per total livable area (log-transformed)")

# map of properties within 500 m of highway, colored by sale_price
# ggplot() +
#   geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
#   geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
#   geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
#   geom_sf(data = highways_500buffer,
#           color = "transparent", fill = "grey", alpha = 0.4) +
#   geom_sf(data = properties500_clean,
#           aes(color = sale_price), size = .3) +
#   geom_sf(data = philly_mainhighways,
#           color = "black") +
#   labs(title = "Sale Price of Properties within 500m of Highways in Philly",
#        subtitle = "US-1, I-76, I-95, and I-676") +
#   theme_void()

# map of properties within 500 m of highway, colored by price_perTA
# ggplot() +
#   geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
#   geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
#   geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
#   geom_sf(data = highways_500buffer,
#           color = "transparent", fill = "grey", alpha = 0.4) +
#   geom_sf(data = properties500_clean,
#           aes(color = price_perTA), size = .3) +
#   geom_sf(data = philly_mainhighways,
#           color = "black") +
#   labs(title = "Sale Price (per Total Area) of Properties within 500m of Highways in Philly",
#        subtitle = "US-1, I-76, I-95, and I-676") +
#   theme_void()

# ggplot() +
#   geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
#   geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
#   geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
#   geom_sf(data = highways_500buffer,
#           color = "transparent", fill = "grey", alpha = 0.4) +
#   geom_sf(data = properties500_clean,
#           aes(color = price_perTLA), size = .3) +
#   geom_sf(data = philly_mainhighways,
#           color = "black") +
#   labs(title = "Sale Price (per Total Livable Area) of Properties within 500m of Highways in Philly",
#        subtitle = "US-1, I-76, I-95, and I-676") +
#   theme_void()

ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = highways_500buffer,
          color = "transparent", fill = "black", alpha = 0.2) +
  geom_sf(data = properties500_clean,
          aes(color = log_price), size = .3) +
  geom_sf(data = philly_mainhighways,
          color = "black") +
  labs(title = "Log Sale Price of Properties within 500m of Highways in Philly",
       subtitle = "US-1, I-76, I-95, and I-676",
       color = "Log Price") +
  theme_void()

# house sale price (category_easy == "Residential or Mixed")
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = highways_500buffer,
          color = "transparent", fill = "black", alpha = 0.2) +
  geom_sf(data = properties500_clean %>% 
            filter(grepl("Resident", category_easy)),
          aes(color = log_price), size = .3) +
  geom_sf(data = philly_mainhighways,
          color = "black") +
  labs(title = "Log Sale Price of Properties within 500m of Highways in Philly",
       subtitle = "Residential or Mixed Use properties only\nUS-1, I-76, I-95, and I-676",
       color = "Log Price") +
  theme_void()

ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = highways_500buffer,
          color = "transparent", fill = "grey", alpha = 0.4) +
  geom_sf(data = properties500_clean,
          aes(color = log_price_perTA), size = .3) +
  geom_sf(data = philly_mainhighways,
          color = "black") +
  labs(title = "Log Sale Price (per Total Area) of Properties within 500m of Highways in Philly",
       subtitle = "US-1, I-76, I-95, and I-676",
       color = "Log Price (per TA)") +
  theme_void()

# same thing but residential or mixed use only
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = highways_500buffer,
          color = "transparent", fill = "grey", alpha = 0.4) +
  geom_sf(data = properties500_clean %>% 
            filter(grepl("Resident", category_easy)),
          aes(color = log_price_perTA), size = .3) +
  geom_sf(data = philly_mainhighways,
          color = "black") +
  labs(title = "Log Sale Price (per Total Area) of Properties within 500m of Highways in Philly",
       subtitle = "Residential or Mixed Use properties only\nUS-1, I-76, I-95, and I-676",
       color = "Log Price (per TA)") +
  theme_void()

ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = highways_500buffer,
          color = "transparent", fill = "grey", alpha = 0.4) +
  geom_sf(data = properties500_clean,
          aes(color = log_price_perTLA), size = .3) +
  geom_sf(data = philly_mainhighways,
          color = "black") +
  labs(title = "Log Sale Price (per Total Livable Area) of Properties within 500m of Highways in Philly",
       subtitle = "US-1, I-76, I-95, and I-676",
       color = "Log Price (per TLA)") +
  theme_void()

# same thing but residential/mixed use only
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = highways_500buffer,
          color = "transparent", fill = "grey", alpha = 0.4) +
  geom_sf(data = properties500_clean %>% 
            filter(grepl("Resident", category_easy)),
          aes(color = log_price_perTLA), size = .3) +
  geom_sf(data = philly_mainhighways,
          color = "black") +
  labs(title = "Log Sale Price (per Total Livable Area) of Properties within 500m of Highways in Philly",
       subtitle = "Residential or Mixed Use properties only\nUS-1, I-76, I-95, and I-676",
       color = "Log Price (per TLA)") +
  theme_void()

Calculate each property’s distance to highways.

# calculate each property's distance to highways of interest
# distance is in feet
# start_time <- Sys.time()
# prop500 <- properties500_clean %>%
#   select(matches("assessment_date|building_code|category_|census_tract|geographic|zip|market_value|parcel|sale_|taxable|_area|year_built|objectid|geometry|price")) %>%
#   mutate(dist_to_US001 = calculate_nearest_distance(geometry,
#                                      philly_mainhighways %>%
#                                        filter(ST_RT_NO == "0001")),
#          dist_to_I076 = calculate_nearest_distance(geometry,
#                                     philly_mainhighways %>%
#                                       filter(ST_RT_NO == "0076")),
#          dist_to_I095 = calculate_nearest_distance(geometry,
#                                     philly_mainhighways %>%
#                                       filter(ST_RT_NO == "0095")),
#          dist_to_I676 = calculate_nearest_distance(geometry,
#                                     philly_mainhighways %>%
#                                       filter(ST_RT_NO == "0676")),
#          dist_to_US001m = dist_to_US001 * 0.3048,
#          dist_to_I076m = dist_to_I076 * 0.3048,
#          dist_to_I095m = dist_to_I095 * 0.3048,
#          dist_to_I676m = dist_to_I676 * 0.3048) # Time difference of 55.20697 secs

# find closest highway
# prop500_closest <- prop500 %>%
#   select(matches("objectid|dist_to")) %>%
#   pivot_longer(cols = 2:5,
#                names_to = "highway",
#                values_to = "distance") %>%
#   group_by(objectid) %>%
#   summarise(dist_to_closest_highway = min(distance, na.rm = T)) %>%
#   ungroup()  %>%
#   mutate(dist_to_closest_highwaym = dist_to_closest_highway * 0.3048)

# join closest highway
# prop500 <- left_join(prop500,
#                      prop500_closest %>% st_drop_geometry(),
#                      by = "objectid") %>%
#   mutate(closest_highway = case_when(
#     dist_to_closest_highway == dist_to_US001 ~ "US-1",
#     dist_to_closest_highway == dist_to_I076 ~ "I-76",
#     dist_to_closest_highway == dist_to_I095 ~ "I-95",
#     dist_to_closest_highway == dist_to_I676 ~ "I-676",
#     .default = NA),
#     closest_highway = factor(closest_highway, levels = c("US-1", "I-76", "I-676", "I-95")),
#     highway_type = case_when(closest_highway %in% c("US-1","I-676") ~ "through neighborhood",
#                              closest_highway %in% c("I-95","I-76") ~ "along waterway",
#                              .default = NA))

# save file
# st_write(prop500, "~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/philly_prop500_dst2hghwy_250318.geojson")

# read in pre-saved file
prop500 <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/philly_prop500_dst2hghwy_250318.geojson") %>% 
  mutate(closest_highway = factor(closest_highway, levels = c("US-1", "I-76", "I-676", "I-95")))
## Reading layer `philly_prop500_dst2hghwy_250318' from data source 
##   `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/philly_prop500_dst2hghwy_250318.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 13343 features and 41 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 2662223 ymin: 212357.3 xmax: 2742918 ymax: 299389.6
## Projected CRS: NAD83 / Pennsylvania South (ftUS)

First, let’s looks at a faceted plot of the distribution of sales price for each highway separately

# prop500_faceted <- 
#   ggplot(data = prop500,
#          aes(x = dist_to_closest_highwaym, y = log_price_perTA)) +
#   geom_point(color = "#1a9988", alpha = .1, size = 0.3) +
#   geom_smooth(color = "#eb5600", method = "lm", se = F, linewidth = 1.2) +
#   # geom_point(alpha = .1, size = 0.3) +
#   # geom_smooth(method = "lm", se = F, linewidth = 1.2) +
#   facet_wrap(~closest_highway) +
#   labs(title = "Distribution of Property Sale Price",
#        x = "Distance to Closest Highway (m)",
#        y = "Log Sale Price (per TA)") +
#   theme_minimal()

prop500_faceted <- 
  ggplot(data = prop500,
         aes(x = log_price_perTA, color = closest_highway, fill = closest_highway)) +
  geom_histogram(bins = 50) +
  facet_wrap(~closest_highway, scales = "free_y") +
  scale_color_manual(values = c("US-1" = "#1a9988",
                                "I-76" = "#eb5600",
                                "I-95" = "#eba075",
                                "I-676" = "#7fe3d6")) +
  scale_fill_manual(values = c("US-1" = "#1a9988",
                               "I-76" = "#eb5600",
                               "I-95" = "#eba075",
                               "I-676" = "#7fe3d6")) +
  labs(title = "Distribution of Property Sale Price",
       x = "Log Sale Price (per Total Area)",
       y = "Count",
       color = "Closest Highway",
       fill = "Closest Highway") +
  theme_minimal()

prop500_faceted

# save image for slides
# ggsave("Output/SalePriceDistribution.png", plot = prop500_faceted)

# residential + mixed only 
# prop500_faceted_res <- 
#   ggplot(data = prop500 %>% 
#            filter(grepl("Reside", category_easy)),
#          aes(x = dist_to_closest_highwaym, y = log_price_perTA)) +
#   geom_point(color = "#1a9988", alpha = .1, size = 0.3) +
#   geom_smooth(color = "#eb5600", method = "lm", se = F, linewidth = 1.2) +
#   # geom_point(alpha = .1, size = 0.3) +
#   # geom_smooth(method = "lm", se = F, linewidth = 1.2) +
#   # facet_wrap(closest_highway) +
#   labs(title = "Distribution of Property Sale Price",
#        x = "Distance to Closest Highway (m)",
#        y = "Log Sale Price (per TA)") +
#   theme_minimal()

prop500_faceted_res <- 
  ggplot(data = prop500 %>% 
           filter(grepl("Resid", category_easy)),
         aes(x = log_price_perTA, color = closest_highway, fill = closest_highway)) +
  geom_histogram(bins = 50) +
  facet_wrap(~closest_highway, scales = "free_y") +
  scale_color_manual(values = c("US-1" = "#1a9988",
                                "I-76" = "#eb5600",
                                "I-95" = "#eba075",
                                "I-676" = "#7fe3d6")) +
  scale_fill_manual(values = c("US-1" = "#1a9988",
                               "I-76" = "#eb5600",
                               "I-95" = "#eba075",
                               "I-676" = "#7fe3d6")) +
  labs(title = "Distribution of Property Sale Price",
       subtitle = "Residential and Mixed Properties only",
       x = "Log Sale Price (per Total Area)",
       y = "Count",
       color = "Closest Highway",
       fill = "Closest Highway") +
  theme_minimal()

prop500_faceted_res

# save image for slides
# ggsave("Output/SalePriceDistribution_residential.png", plot = prop500_faceted_res)

Now, look at relationship between distance to highway and price.

# map of properties within 500m of highway cored by which highway they're closest to
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = highways_500buffer,
          color = "transparent", fill = "grey", alpha = 0.4) +
  geom_sf(data = prop500,
          aes(color = closest_highway), size = 0.3) +
  geom_sf(data = philly_mainhighways,
          color = "black") +
  
  # add custom colors
  scale_color_manual(values = c("US-1" = "#1a9988",
                                "I-76" = "#eb5600",
                                "I-95" = "#eba075",
                                "I-676" = "#7fe3d6")) +
  
  labs(title = "Properties within 500m of Highways in Philly",
       subtitle = "US-1, I-76, I-95, and I-676",
       color = "Closest Highway") +
  theme_void()

# simple scatter of price and dist2closesthighway
ggplot(data = prop500, 
       aes(x = dist_to_closest_highwaym, y = adjusted_sale_price, 
           color = closest_highway, fill = closest_highway)) +
  geom_point(alpha = .2) +
  geom_smooth(method = "lm") +
  scale_color_manual(values = c("US-1" = "#1a9988",
                                "I-76" = "#eb5600",
                                "I-95" = "#eba075",
                                "I-676" = "#7fe3d6")) +
  scale_fill_manual(values = c("US-1" = "#1a9988",
                               "I-76" = "#eb5600",
                               "I-95" = "#eba075",
                               "I-676" = "#7fe3d6")) +
  labs(title = "Property Price as a function of Distance to Closest Highway",
       subtitle = "Properties within 500m of highways",
       fill = "Closest Highway",
       color = "Closest Highway",
       x = "Distance to Closest Highway (m)",
       y = "Property Sale Price ($)") +
  theme_minimal()

# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway
cor_us1 <- round(cor(prop500 %>% 
                 filter(grepl("US", closest_highway)) %>% 
                 pull(log_price_perTA), 
               prop500 %>% 
                 filter(grepl("US", closest_highway)) %>% 
                 pull(dist_to_closest_highwaym)), 3)
cor_i76 <- round(cor(prop500 %>% 
                 filter(grepl("I-76", closest_highway)) %>% 
                 pull(log_price_perTA), 
               prop500 %>% 
                 filter(grepl("I-76", closest_highway)) %>% 
                 pull(dist_to_closest_highwaym)), 3)
cor_i95 <- round(cor(prop500 %>% 
                 filter(grepl("I-95", closest_highway)) %>% 
                 pull(log_price_perTA), 
               prop500 %>% 
                 filter(grepl("I-95", closest_highway)) %>% 
                 pull(dist_to_closest_highwaym)), 3)
cor_i676 <- round(cor(prop500 %>% 
                  filter(grepl("I-676", closest_highway)) %>% 
                  pull(log_price_perTA), 
                prop500 %>% 
                  filter(grepl("I-676", closest_highway)) %>% 
                  pull(dist_to_closest_highwaym)), 3)

scatter_4highways <- 
  ggplot(data = prop500, 
       aes(x = dist_to_closest_highwaym, y = log_price_perTA, 
           color = closest_highway, fill = closest_highway)) +
  geom_point(alpha = .2, size = 0.3) +
  geom_smooth(method = "lm", size = 1.2) +
  scale_color_manual(values = c("US-1" = "#1a9988",
                                "I-76" = "#eb5600",
                                "I-95" = "#eba075",
                                "I-676" = "#7fe3d6")) +
  scale_fill_manual(values = c("US-1" = "#1a9988",
                               "I-76" = "#eb5600",
                               "I-95" = "#eba075",
                               "I-676" = "#7fe3d6")) +
  geom_text(aes(label = cor_us1,
                x = 150,
                y = 1.25),
            color = "#1a9988",
            show.legend = F) +
  geom_text(aes(label = cor_i76,
                x = 350,
                y = 1.25),
            color = "#eb5600",
            show.legend = F) +
  geom_text(aes(label = cor_i95,
                x = 350,
                y = 8.75),
            color = "#eba075",
            show.legend = F) +
  geom_text(aes(label = cor_i676,
                x = 150,
                y = 8.75),
            color = "#7fe3d6",
            show.legend = F) +
  labs(title = "Property Price (log price per TA) as a function of Distance to Closest Highway",
       subtitle = "Properties within 500m of highways",
       fill = "Closest Highway",
       color = "Closest Highway",
       x = "Distance to Closest Highway (m)",
       y = "Property Sale Price (log price per TA)") +
  theme_minimal()

scatter_4highways

# save image for slides
# ggsave("Output/PriceVSDist2Highway_4highways.png",
#        plot = scatter_4highways,
#        height = 7,
#        width = 10)

# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway 
# sale_price <= 1e7
ggplot(data = prop500 %>% 
         filter(sale_price <= 1e7), 
       aes(x = dist_to_closest_highwaym, y = log_price_perTA, 
           color = closest_highway, fill = closest_highway)) +
  geom_point(alpha = .2, size = 0.3) +
  geom_smooth(method = "lm", size = 1.2) +
  scale_color_manual(values = c("US-1" = "#1a9988",
                                "I-76" = "#eb5600",
                                "I-95" = "#eba075",
                                "I-676" = "#7fe3d6")) +
  scale_fill_manual(values = c("US-1" = "#1a9988",
                               "I-76" = "#eb5600",
                               "I-95" = "#eba075",
                               "I-676" = "#7fe3d6")) +
  labs(title = "Property Price (log price per TA) as a function of Distance to Closest Highway",
       subtitle = "Properties within 500m of highways",
       fill = "Closest Highway",
       color = "Closest Highway",
       x = "Distance to Closest Highway (m)",
       y = "Property Sale Price (log price per TA)") +
  theme_minimal()

# simple scatter of price (as log price per total area in sq ft) and log(dist2closesthighway)
ggplot(data = prop500, 
       aes(x = log(dist_to_closest_highwaym), y = log_price_perTA, 
           color = closest_highway, fill = closest_highway)) +
  geom_point(alpha = .2, size = 0.3) +
  geom_smooth(method = "lm", size = 1.2) +
  scale_color_manual(values = c("US-1" = "#1a9988",
                                "I-76" = "#eb5600",
                                "I-95" = "#eba075",
                                "I-676" = "#7fe3d6")) +
  scale_fill_manual(values = c("US-1" = "#1a9988",
                               "I-76" = "#eb5600",
                               "I-95" = "#eba075",
                               "I-676" = "#7fe3d6")) +
  labs(title = "Property Price (log price per TA) as a function of Distance to Closest Highway (log)",
       subtitle = "Properties within 500m of highways",
       fill = "Closest Highway",
       color = "Closest Highway",
       x = "Log Distance to Closest Highway (m)",
       y = "Log Property Sale Price (per TA)") +
  theme_minimal()

# simple regression of price (log_price_perTA) and dist2closesthighway
fit1 <- lm(log_price_perTA ~ dist_to_closest_highwaym, data = prop500)

summary(fit1)
## 
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highwaym, data = prop500)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.1967 -0.6524 -0.1810  0.6549  4.0119 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               5.433e+00  2.044e-02 265.807  < 2e-16 ***
## dist_to_closest_highwaym -3.010e-04  6.545e-05  -4.599 4.28e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.006 on 13341 degrees of freedom
## Multiple R-squared:  0.001583,   Adjusted R-squared:  0.001508 
## F-statistic: 21.15 on 1 and 13341 DF,  p-value: 4.282e-06
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (4 types)
fit2 <- lm(log_price_perTA ~ dist_to_closest_highwaym + closest_highway, data = prop500)

summary(fit2)
## 
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highwaym + closest_highway, 
##     data = prop500)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.6275 -0.4675  0.0543  0.4564  4.3238 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               4.897e+00  1.937e-02 252.834  < 2e-16 ***
## dist_to_closest_highwaym -3.731e-04  5.678e-05  -6.571 5.19e-11 ***
## closest_highwayI-76       1.051e+00  2.884e-02  36.454  < 2e-16 ***
## closest_highwayI-676      1.743e+00  6.418e-02  27.164  < 2e-16 ***
## closest_highwayI-95       9.845e-01  1.575e-02  62.496  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8657 on 13338 degrees of freedom
## Multiple R-squared:  0.2606, Adjusted R-squared:  0.2604 
## F-statistic:  1175 on 4 and 13338 DF,  p-value: < 2.2e-16
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (4 types) with interaction
fit3 <- lm(log_price_perTA ~ dist_to_closest_highwaym + closest_highway + dist_to_closest_highwaym*closest_highway, data = prop500)

summary(fit3)
## 
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highwaym + closest_highway + 
##     dist_to_closest_highwaym * closest_highway, data = prop500)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.7897 -0.4624  0.0515  0.4524  4.3061 
## 
## Coefficients:
##                                                 Estimate Std. Error t value
## (Intercept)                                    4.764e+00  2.581e-02 184.604
## dist_to_closest_highwaym                       1.050e-04  8.353e-05   1.257
## closest_highwayI-76                            7.358e-01  8.645e-02   8.512
## closest_highwayI-676                           1.599e+00  1.313e-01  12.171
## closest_highwayI-95                            1.303e+00  3.625e-02  35.940
## dist_to_closest_highwaym:closest_highwayI-76   8.623e-04  2.481e-04   3.476
## dist_to_closest_highwaym:closest_highwayI-676  7.507e-04  5.035e-04   1.491
## dist_to_closest_highwaym:closest_highwayI-95  -1.142e-03  1.173e-04  -9.742
##                                               Pr(>|t|)    
## (Intercept)                                    < 2e-16 ***
## dist_to_closest_highwaym                      0.208778    
## closest_highwayI-76                            < 2e-16 ***
## closest_highwayI-676                           < 2e-16 ***
## closest_highwayI-95                            < 2e-16 ***
## dist_to_closest_highwaym:closest_highwayI-76  0.000511 ***
## dist_to_closest_highwaym:closest_highwayI-676 0.135991    
## dist_to_closest_highwaym:closest_highwayI-95   < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8614 on 13335 degrees of freedom
## Multiple R-squared:  0.2681, Adjusted R-squared:  0.2678 
## F-statistic: 697.9 on 7 and 13335 DF,  p-value: < 2.2e-16
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (2 types)
fit4 <- lm(log_price_perTA ~ dist_to_closest_highwaym + highway_type, data = prop500)

summary(fit4)
## 
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highwaym + highway_type, 
##     data = prop500)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.6517 -0.4890  0.0290  0.4478  4.2732 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       5.908e+00  1.968e-02 300.203  < 2e-16 ***
## dist_to_closest_highwaym         -4.293e-04  5.791e-05  -7.413 1.31e-13 ***
## highway_typethrough neighborhood -9.423e-01  1.545e-02 -61.008  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8894 on 13340 degrees of freedom
## Multiple R-squared:  0.2194, Adjusted R-squared:  0.2193 
## F-statistic:  1874 on 2 and 13340 DF,  p-value: < 2.2e-16
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (2 types) with interaction
fit5 <- lm(log_price_perTA ~ dist_to_closest_highwaym + highway_type + dist_to_closest_highwaym*highway_type, data = prop500)

sum5 <- summary(fit5)

data.frame(sum5$coefficients) %>% 
  mutate(Estimate = round(Estimate, 3),
         StdErorr = round(Std..Error, 3),
         PValue = round(Pr...t.., 3)) %>% 
  select(Estimate, StdErorr, PValue) %>% 
  kbl(align = c("l", rep("c", 3)),
      caption = "Linear Regression: Sale Price ~ Distance to Closest Highway + Highway Type + Interaction") %>% 
    kable_styling() %>% 
    kable_classic(full_width = F, html_font = "Cambria") 
Linear Regression: Sale Price ~ Distance to Closest Highway + Highway Type + Interaction
Estimate StdErorr PValue
(Intercept) 6.008 0.025 0
dist_to_closest_highwaym -0.001 0.000 0
highway_typethrough neighborhood -1.154 0.036 0
dist_to_closest_highwaym:highway_typethrough neighborhood 0.001 0.000 0
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (2 types) with interaction
fit6 <- lm(log_price_perTA ~ log(dist_to_closest_highwaym) + highway_type + log(dist_to_closest_highwaym)*highway_type, data = prop500)

summary(fit6)
## 
## Call:
## lm(formula = log_price_perTA ~ log(dist_to_closest_highwaym) + 
##     highway_type + log(dist_to_closest_highwaym) * highway_type, 
##     data = prop500)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.8095 -0.4831  0.0275  0.4438  4.2556 
## 
## Coefficients:
##                                                                Estimate
## (Intercept)                                                     6.53100
## log(dist_to_closest_highwaym)                                  -0.13573
## highway_typethrough neighborhood                               -1.70492
## log(dist_to_closest_highwaym):highway_typethrough neighborhood  0.13948
##                                                                Std. Error
## (Intercept)                                                       0.09023
## log(dist_to_closest_highwaym)                                     0.01630
## highway_typethrough neighborhood                                  0.13197
## log(dist_to_closest_highwaym):highway_typethrough neighborhood    0.02393
##                                                                t value Pr(>|t|)
## (Intercept)                                                     72.385  < 2e-16
## log(dist_to_closest_highwaym)                                   -8.328  < 2e-16
## highway_typethrough neighborhood                               -12.919  < 2e-16
## log(dist_to_closest_highwaym):highway_typethrough neighborhood   5.830 5.69e-09
##                                                                   
## (Intercept)                                                    ***
## log(dist_to_closest_highwaym)                                  ***
## highway_typethrough neighborhood                               ***
## log(dist_to_closest_highwaym):highway_typethrough neighborhood ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.889 on 13339 degrees of freedom
## Multiple R-squared:  0.2202, Adjusted R-squared:   0.22 
## F-statistic:  1256 on 3 and 13339 DF,  p-value: < 2.2e-16

Visualizing the highway types (cutting through neighborhoods vs running alongside water).

# simple scatter of price and dist2closesthighway
ggplot(data = prop500, 
       aes(x = dist_to_closest_highwaym, y = adjusted_sale_price, 
           color = highway_type, fill = highway_type)) +
  geom_point(alpha = .2, size = 0.3) +
  geom_smooth(method = "lm") +
  scale_color_manual(values = c("through neighborhood" = "#1a9988",
                                "along waterway" = "#eb5600")) +
  scale_fill_manual(values = c("through neighborhood" = "#1a9988",
                               "along waterway" = "#eb5600")) +
  labs(title = "Property Price as a function of Distance to Highway Type",
       subtitle = "Properties within 500m of highways",
       fill = "Highway Type",
       color = "Highway Type") +
  theme_minimal()

# simple scatter of price and dist2closesthighway with loess method
ggplot(data = prop500, 
       aes(x = dist_to_closest_highwaym, y = sale_price, 
           color = highway_type, fill = highway_type)) +
  geom_point(alpha = .2, size = 0.3) +
  geom_smooth(method = "loess", se = FALSE, size = 1.2) +
  scale_color_manual(values = c("through neighborhood" = "#1a9988",
                                "along waterway" = "#eb5600")) +
  scale_fill_manual(values = c("through neighborhood" = "#1a9988",
                               "along waterway" = "#eb5600")) +
  labs(title = "Property Price as a function of Distance to Highway Type",
       subtitle = "Properties within 500m of highways",
       fill = "Highway Type",
       color = "Highway Type") +
  theme_minimal()

# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway
cor_thru <- round(cor(prop500 %>%
                        filter(grepl("through", highway_type)) %>% 
                        pull(log_price_perTA), 
                      prop500 %>% 
                        filter(grepl("through", highway_type)) %>% 
                        pull(dist_to_closest_highwaym)), 3)
cor_water <- round(cor(prop500 %>% 
                         filter(grepl("along", highway_type)) %>% 
                         pull(log_price_perTA), 
                       prop500 %>% 
                         filter(grepl("along", highway_type)) %>% 
                         pull(dist_to_closest_highwaym)), 3)

scatter_2types <- 
  ggplot(data = prop500, 
       aes(x = dist_to_closest_highwaym, y = log_price_perTA, 
           color = highway_type, fill = highway_type)) +
  geom_point(alpha = .2, size = 0.3) +
  geom_smooth(method = "lm", size = 1.2) +
  scale_color_manual(values = c("through neighborhood" = "#1a9988",
                                "along waterway" = "#eb5600")) +
  scale_fill_manual(values = c("through neighborhood" = "#1a9988",
                               "along waterway" = "#eb5600")) +
  geom_text(aes(label = cor_thru,
                x = 250,
                y = 1),
            color = "#1a9988",
            show.legend = F) +
  geom_text(aes(label = cor_water,
                x = 250,
                y = 9),
            color = "#eb5600",
            show.legend = F) +
  labs(title = "Property Price (log price per TA) as a function of Distance to Highway Type",
       subtitle = "Properties within 500m of highways",
       fill = "Highway Type",
       color = "Highway Type",
       x = "Distance to Closest Highway (m)",
       y = "Log Sale Price (per TA)") +
  theme_minimal()
scatter_2types

# save for slides
# ggsave("Output/PriceVSDist2Highway_2types.png", 
#        plot = scatter_2types,
#        height = 7,
#        width = 10)

# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway
ggplot(data = prop500, 
       aes(x = log(dist_to_closest_highwaym), y = log_price_perTA, 
           color = highway_type, fill = highway_type)) +
  geom_point(alpha = .2, size = 0.3) +
  geom_smooth(method = "lm", size = 1.2) +
  scale_color_manual(values = c("through neighborhood" = "#1a9988",
                                "along waterway" = "#eb5600")) +
  scale_fill_manual(values = c("through neighborhood" = "#1a9988",
                               "along waterway" = "#eb5600")) +
  labs(title = "Property Price (log price per TA) as a function of Distance to Highway Type",
       subtitle = "Properties within 500m of highways",
       fill = "Highway Type",
       color = "Highway Type",
       x = "Distance to Closest Highway (m)",
       y = "Log Sale Price (per TA)") +
  theme_minimal()

# correlations
prop500 %>%
  st_drop_geometry() %>%
  group_by(highway_type) %>%
  summarize(correlation = cor(dist_to_closest_highwaym, log_price_perTA, method = "pearson"), .groups = "drop") %>% 
  mutate(correlation = round(correlation, 3)) %>% 
  kbl(col.names = c("Highway Type", "Correlation"),
        align = c("l", "c"),
      caption = "Correlation Between Property Sale Price and Distance to Closest Highway by Highway Types") %>% 
    kable_styling() %>% 
    kable_classic(full_width = F, html_font = "Cambria") 
Correlation Between Property Sale Price and Distance to Closest Highway by Highway Types
Highway Type Correlation
along waterway -0.100
through neighborhood -0.006
# correlation of price and dist to highway for each highway
prop500 %>%
  st_drop_geometry() %>%
  group_by(closest_highway) %>%
  summarize(correlation = cor(dist_to_closest_highwaym, log_price_perTA, method = "pearson"), .groups = "drop") %>% 
  mutate(correlation = round(correlation, 3)) %>% 
  kbl(col.names = c("Highway", "Correlation"),
        align = c("l", "c"),
      caption = "Correlation Between Property Sale Price and Distance to Closest Highway by Highway") %>%
  kable_styling() %>% 
  kable_classic(html_font = "Cambria") 
Correlation Between Property Sale Price and Distance to Closest Highway by Highway
Highway Correlation
US-1 0.023
I-76 0.107
I-676 0.147
I-95 -0.134
prop500 %>%
  st_drop_geometry() %>%
  group_by(closest_highway) %>%
  summarize(correlation = cor(log(dist_to_closest_highwaym), log_price_perTA, method = "pearson"), .groups = "drop")
## # A tibble: 4 × 2
##   closest_highway correlation
##   <fct>                 <dbl>
## 1 US-1                 0.0276
## 2 I-76                 0.141 
## 3 I-676                0.203 
## 4 I-95                -0.117

Multiple Linear Regression: Sale Price by Distance to Highway (controlling for other variables)

We’re looking at all properties in Philadelphia (sale date within the last 5 years, aka between Jan. 1st, 2020 - Dec.31st, 2024; sale price is < $30,000 and > 5 SD from the mean sale price; total area is < 100 sq. ft.; inflation-adjusted prices).

I’m using log_price = log(adjusted_sale_price) as the outcome variable and including internal features, amenities, and fixed effects in addition to distance to highway.

# takes about 7 min to run
# cleaned_properties <- philly_properties %>%
#   mutate(price_perTLA = sale_price / total_livable_area,
#          price_perTA = sale_price / total_area,
#          log_price = log(sale_price),
#          log_price_perTLA = log(price_perTLA),
#          log_price_perTA = log(price_perTA),
#          category_easy = case_when(category_code_description %in% c("APARTMENTS  > 4 UNITS",
#                                                                     "MIXED USE",
#                                                                     "MULTI FAMILY",
#                                                                     "SINGLE FAMILY") ~ "Residential or Mixed",
#                                    grepl("INDUS", category_code_description) ~ "Industrial",
#                                    grepl("HOTEL", category_code_description) ~ "Hotel",
#                                    grepl("OFFICES", category_code_description) ~ "Offices",
#                                    category_code_description %in% c("COMMERCIAL",
#                                                                     "GARAGE - COMMERCIAL",
#                                                                     "RETAIL") ~ "Commercial",
#                                    grepl("VACANT", category_code_description) ~ "Vacant",
#                                    .default = "Other"),
#          dist_to_US001 = calculate_nearest_distance(geometry,
#                                      philly_mainhighways %>%
#                                        filter(ST_RT_NO == "0001")),
#          dist_to_I076 = calculate_nearest_distance(geometry,
#                                     philly_mainhighways %>%
#                                       filter(ST_RT_NO == "0076")),
#          dist_to_I095 = calculate_nearest_distance(geometry,
#                                     philly_mainhighways %>%
#                                       filter(ST_RT_NO == "0095")),
#          dist_to_I676 = calculate_nearest_distance(geometry,
#                                     philly_mainhighways %>%
#                                       filter(ST_RT_NO == "0676")),
#          dist_park = calculate_nearest_distance(geometry,
#                                                 philly_parks),
#          dist_lib = calculate_nearest_distance(geometry,
#                                                libraries),
#          dist_transit = calculate_nearest_distance(geometry,
#                                                    transit),
#          dist_school = calculate_nearest_distance(geometry,
#                                                   school),
#          dist_cityhall = calculate_nearest_distance(geometry,
#                                                     city_hall),
#          dist_hospital = calculate_nearest_distance(geometry,
#                                                     hospitals),
#          dist_museum = calculate_nearest_distance(geometry,
#                                                   museums),
#          dist_prison = calculate_nearest_distance(geometry,
#                                                   prisons),
#          dist_historic = calculate_nearest_distance(geometry,
#                                                     historic),
#          dist_to_US001m = dist_to_US001 * 0.3048,
#          dist_to_I076m = dist_to_I076 * 0.3048,
#          dist_to_I095m = dist_to_I095 * 0.3048,
#          dist_to_I676m = dist_to_I676 * 0.3048,
#          dist_parkm = dist_park * 0.3048,
#          dist_libm = dist_lib * 0.3048,
#          dist_transitm = dist_transit * 0.3048,
#          dist_schoolm = dist_school * 0.3048,
#          dist_cityhallm = dist_cityhall * 0.3048,
#          dist_hospitalm = dist_hospital * 0.3048,
#          dist_museumm = dist_museum * 0.3048,
#          dist_prisonm = dist_prison * 0.3048,
#          dist_historicm = dist_historic * 0.3048)

# find closest highway
# clean_closest <- cleaned_properties %>%
#   select(matches("objectid|dist_to_US|dist_to_I")) %>%
#   pivot_longer(cols = 2:5,
#                names_to = "highway",
#                values_to = "distance") %>%
#   group_by(objectid) %>%
#   summarise(dist_to_closest_highway = min(distance, na.rm = T)) %>%
#   ungroup()  %>%
#   mutate(dist_to_closest_highwaym = dist_to_closest_highway * 0.3048)

# join closest highway
# cleaned_props <- left_join(cleaned_properties,
#                            clean_closest %>% st_drop_geometry(),
#                            by = "objectid") %>%
#   mutate(closest_highway = case_when(
#     dist_to_closest_highway == dist_to_US001 ~ "US-1",
#     dist_to_closest_highway == dist_to_I076 ~ "I-76",
#     dist_to_closest_highway == dist_to_I095 ~ "I-95",
#     dist_to_closest_highway == dist_to_I676 ~ "I-676",
#     .default = NA),
#     closest_highway = factor(closest_highway, levels = c("US-1", "I-76", "I-676", "I-95")),
#     highway_type = case_when(closest_highway %in% c("US-1","I-676") ~ "through neighborhood",
#                              closest_highway %in% c("I-95","I-76") ~ "along waterway",
#                              .default = NA)) %>%
#   filter(!is.na(closest_highway))

# save for easier access later
# st_write(cleaned_props,
#          "~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/philly_cleaned_properties_with_amenities_250318.geojson")

# read in pre-saved data
cleaned_props <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/philly_cleaned_properties_with_amenities.geojson") %>% 
  mutate(year_built = as.numeric(year_built))
## Reading layer `philly_cleaned_properties_with_amenities' from data source 
##   `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/philly_cleaned_properties_with_amenities.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 244003 features and 117 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 2662223 ymin: 210016.8 xmax: 2749428 ymax: 304861
## Projected CRS: NAD83 / Pennsylvania South (ftUS)

Looking at correlation matrix of internal characteristics.

# model with no interactions
mult_reg <- lm(log_price ~ dist_to_closest_highwaym + 
                 total_area + year_built + # internal characteristics
                 dist_parkm + dist_libm + dist_cityhallm + dist_hospitalm + dist_historicm + # external / amenities
                 factor(zip_code), # neighborhood effects (via zip code for now)
               data = cleaned_props)

summary(mult_reg)
## 
## Call:
## lm(formula = log_price ~ dist_to_closest_highwaym + total_area + 
##     year_built + dist_parkm + dist_libm + dist_cityhallm + dist_hospitalm + 
##     dist_historicm + factor(zip_code), data = cleaned_props)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.7283 -0.4026  0.0158  0.4234  5.9466 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               1.298e+01  9.082e-02 142.874  < 2e-16 ***
## dist_to_closest_highwaym -3.267e-05  2.673e-06 -12.222  < 2e-16 ***
## total_area                1.406e-07  1.166e-08  12.059  < 2e-16 ***
## year_built                8.137e-04  3.015e-05  26.994  < 2e-16 ***
## dist_parkm                1.874e-04  1.013e-05  18.498  < 2e-16 ***
## dist_libm                 2.197e-05  4.309e-06   5.098 3.44e-07 ***
## dist_cityhallm            3.409e-05  2.240e-06  15.222  < 2e-16 ***
## dist_hospitalm           -1.537e-04  3.399e-06 -45.216  < 2e-16 ***
## dist_historicm            4.187e-05  2.987e-06  14.016  < 2e-16 ***
## factor(zip_code)19103    -5.293e-01  7.495e-02  -7.062 1.64e-12 ***
## factor(zip_code)19104    -2.508e+00  7.245e-02 -34.612  < 2e-16 ***
## factor(zip_code)19106    -9.430e-01  7.575e-02 -12.450  < 2e-16 ***
## factor(zip_code)19107    -7.211e-01  7.760e-02  -9.292  < 2e-16 ***
## factor(zip_code)19109     1.210e+00  4.842e-01   2.499   0.0124 *  
## factor(zip_code)19111    -2.781e+00  7.655e-02 -36.327  < 2e-16 ***
## factor(zip_code)19112    -1.519e-02  2.415e-01  -0.063   0.9498    
## factor(zip_code)19114    -2.656e+00  8.207e-02 -32.362  < 2e-16 ***
## factor(zip_code)19115    -2.283e+00  8.187e-02 -27.881  < 2e-16 ***
## factor(zip_code)19116    -2.431e+00  8.571e-02 -28.360  < 2e-16 ***
## factor(zip_code)19118    -1.376e+00  7.955e-02 -17.299  < 2e-16 ***
## factor(zip_code)19119    -2.011e+00  7.554e-02 -26.630  < 2e-16 ***
## factor(zip_code)19120    -3.360e+00  7.429e-02 -45.227  < 2e-16 ***
## factor(zip_code)19121    -2.424e+00  7.210e-02 -33.618  < 2e-16 ***
## factor(zip_code)19122    -2.315e+00  7.288e-02 -31.763  < 2e-16 ***
## factor(zip_code)19123    -1.583e+00  7.302e-02 -21.682  < 2e-16 ***
## factor(zip_code)19124    -3.329e+00  7.446e-02 -44.709  < 2e-16 ***
## factor(zip_code)19125    -2.248e+00  7.261e-02 -30.954  < 2e-16 ***
## factor(zip_code)19126    -2.739e+00  7.685e-02 -35.643  < 2e-16 ***
## factor(zip_code)19127    -2.059e+00  7.620e-02 -27.017  < 2e-16 ***
## factor(zip_code)19128    -2.220e+00  7.456e-02 -29.783  < 2e-16 ***
## factor(zip_code)19129    -2.342e+00  7.499e-02 -31.230  < 2e-16 ***
## factor(zip_code)19130    -1.373e+00  7.236e-02 -18.973  < 2e-16 ***
## factor(zip_code)19131    -2.987e+00  7.295e-02 -40.947  < 2e-16 ***
## factor(zip_code)19132    -3.667e+00  7.239e-02 -50.647  < 2e-16 ***
## factor(zip_code)19133    -3.645e+00  7.331e-02 -49.723  < 2e-16 ***
## factor(zip_code)19134    -3.428e+00  7.284e-02 -47.065  < 2e-16 ***
## factor(zip_code)19135    -2.807e+00  7.626e-02 -36.809  < 2e-16 ***
## factor(zip_code)19136    -2.781e+00  7.831e-02 -35.516  < 2e-16 ***
## factor(zip_code)19137    -2.765e+00  7.719e-02 -35.814  < 2e-16 ***
## factor(zip_code)19138    -3.059e+00  7.533e-02 -40.616  < 2e-16 ***
## factor(zip_code)19139    -3.058e+00  7.268e-02 -42.070  < 2e-16 ***
## factor(zip_code)19140    -3.754e+00  7.283e-02 -51.551  < 2e-16 ***
## factor(zip_code)19141    -3.218e+00  7.465e-02 -43.113  < 2e-16 ***
## factor(zip_code)19142    -3.112e+00  7.313e-02 -42.563  < 2e-16 ***
## factor(zip_code)19143    -2.897e+00  7.231e-02 -40.060  < 2e-16 ***
## factor(zip_code)19144    -2.792e+00  7.400e-02 -37.736  < 2e-16 ***
## factor(zip_code)19145    -2.512e+00  7.168e-02 -35.044  < 2e-16 ***
## factor(zip_code)19146    -2.039e+00  7.149e-02 -28.526  < 2e-16 ***
## factor(zip_code)19147    -1.607e+00  7.167e-02 -22.428  < 2e-16 ***
## factor(zip_code)19148    -2.420e+00  7.170e-02 -33.748  < 2e-16 ***
## factor(zip_code)19149    -2.888e+00  7.612e-02 -37.941  < 2e-16 ***
## factor(zip_code)19150    -2.483e+00  7.722e-02 -32.155  < 2e-16 ***
## factor(zip_code)19151    -2.843e+00  7.363e-02 -38.609  < 2e-16 ***
## factor(zip_code)19152    -2.676e+00  7.907e-02 -33.840  < 2e-16 ***
## factor(zip_code)19153    -2.426e+00  7.548e-02 -32.140  < 2e-16 ***
## factor(zip_code)19154    -2.879e+00  8.564e-02 -33.623  < 2e-16 ***
## factor(zip_code)19192    -1.055e+00  8.326e-01  -1.267   0.2053    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8296 on 243946 degrees of freedom
## Multiple R-squared:  0.337,  Adjusted R-squared:  0.3369 
## F-statistic:  2215 on 56 and 243946 DF,  p-value: < 2.2e-16
# model with interaction between dist to closest highway and ZIP
mult_reg_int <- lm(log_price ~ dist_to_closest_highwaym * factor(zip_code) + 
                     total_area + year_built + # internal characteristics
                     dist_parkm + dist_libm + dist_cityhallm + dist_hospitalm + dist_historicm, # external / amenities
                   data = cleaned_props)

summary(mult_reg_int)
## 
## Call:
## lm(formula = log_price ~ dist_to_closest_highwaym * factor(zip_code) + 
##     total_area + year_built + dist_parkm + dist_libm + dist_cityhallm + 
##     dist_hospitalm + dist_historicm, data = cleaned_props)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.5339 -0.3954  0.0125  0.4030  5.6840 
## 
## Coefficients: (2 not defined because of singularities)
##                                                  Estimate Std. Error t value
## (Intercept)                                     1.396e+01  2.073e-01  67.366
## dist_to_closest_highwaym                       -7.427e-04  1.806e-04  -4.112
## factor(zip_code)19103                          -1.439e+00  2.051e-01  -7.017
## factor(zip_code)19104                          -3.731e+00  2.017e-01 -18.493
## factor(zip_code)19106                          -1.918e+00  2.037e-01  -9.417
## factor(zip_code)19107                          -1.579e+00  2.105e-01  -7.500
## factor(zip_code)19109                           1.131e+00  4.721e-01   2.397
## factor(zip_code)19111                          -2.965e+00  2.028e-01 -14.616
## factor(zip_code)19112                          -1.037e+00  6.905e-01  -1.502
## factor(zip_code)19114                          -2.074e+00  2.073e-01 -10.005
## factor(zip_code)19115                          -1.930e+00  2.066e-01  -9.341
## factor(zip_code)19116                          -1.550e+00  2.100e-01  -7.382
## factor(zip_code)19118                          -1.121e+00  2.985e-01  -3.757
## factor(zip_code)19119                          -2.759e+00  2.178e-01 -12.671
## factor(zip_code)19120                          -3.602e+00  2.010e-01 -17.924
## factor(zip_code)19121                          -3.084e+00  2.021e-01 -15.254
## factor(zip_code)19122                          -1.982e+00  2.033e-01  -9.751
## factor(zip_code)19123                          -1.804e+00  2.018e-01  -8.938
## factor(zip_code)19124                          -3.093e+00  2.019e-01 -15.316
## factor(zip_code)19125                          -2.137e+00  2.004e-01 -10.663
## factor(zip_code)19126                          -2.582e+00  2.286e-01 -11.292
## factor(zip_code)19127                          -2.727e+00  2.140e-01 -12.742
## factor(zip_code)19128                          -2.770e+00  2.011e-01 -13.769
## factor(zip_code)19129                          -2.547e+00  2.038e-01 -12.495
## factor(zip_code)19130                          -1.631e+00  2.053e-01  -7.942
## factor(zip_code)19131                          -2.574e+00  2.019e-01 -12.750
## factor(zip_code)19132                          -4.282e+00  2.027e-01 -21.124
## factor(zip_code)19133                          -2.316e+00  2.266e-01 -10.218
## factor(zip_code)19134                          -2.285e+00  2.009e-01 -11.373
## factor(zip_code)19135                          -2.501e+00  2.033e-01 -12.301
## factor(zip_code)19136                          -2.512e+00  2.051e-01 -12.249
## factor(zip_code)19137                          -2.760e+00  2.048e-01 -13.473
## factor(zip_code)19138                          -5.220e+00  2.076e-01 -25.140
## factor(zip_code)19139                          -3.893e+00  2.088e-01 -18.646
## factor(zip_code)19140                          -3.976e+00  2.009e-01 -19.792
## factor(zip_code)19141                          -3.880e+00  2.024e-01 -19.175
## factor(zip_code)19142                          -2.863e+00  2.158e-01 -13.266
## factor(zip_code)19143                          -3.529e+00  2.015e-01 -17.519
## factor(zip_code)19144                          -3.415e+00  2.013e-01 -16.959
## factor(zip_code)19145                          -3.428e+00  2.008e-01 -17.072
## factor(zip_code)19146                          -2.837e+00  2.003e-01 -14.167
## factor(zip_code)19147                          -2.014e+00  2.001e-01 -10.064
## factor(zip_code)19148                          -2.775e+00  2.002e-01 -13.862
## factor(zip_code)19149                          -2.781e+00  2.023e-01 -13.748
## factor(zip_code)19150                          -1.916e+00  2.489e-01  -7.697
## factor(zip_code)19151                          -2.708e+00  2.023e-01 -13.382
## factor(zip_code)19152                          -2.654e+00  2.044e-01 -12.982
## factor(zip_code)19153                          -4.058e-01  2.194e-01  -1.850
## factor(zip_code)19154                          -1.787e+00  2.127e-01  -8.403
## factor(zip_code)19192                          -1.267e+00  8.126e-01  -1.559
## total_area                                      1.384e-07  1.136e-08  12.190
## year_built                                      7.406e-04  2.945e-05  25.143
## dist_parkm                                      1.458e-04  1.014e-05  14.379
## dist_libm                                       4.331e-05  4.557e-06   9.505
## dist_cityhallm                                 -1.855e-05  2.818e-06  -6.582
## dist_hospitalm                                 -2.067e-04  4.052e-06 -50.997
## dist_historicm                                 -6.166e-05  3.713e-06 -16.609
## dist_to_closest_highwaym:factor(zip_code)19103  1.067e-03  1.952e-04   5.465
## dist_to_closest_highwaym:factor(zip_code)19104  1.401e-03  1.827e-04   7.668
## dist_to_closest_highwaym:factor(zip_code)19106  1.657e-03  2.076e-04   7.981
## dist_to_closest_highwaym:factor(zip_code)19107  8.703e-04  1.922e-04   4.528
## dist_to_closest_highwaym:factor(zip_code)19109         NA         NA      NA
## dist_to_closest_highwaym:factor(zip_code)19111  8.598e-04  1.809e-04   4.753
## dist_to_closest_highwaym:factor(zip_code)19112  1.755e-03  7.761e-04   2.261
## dist_to_closest_highwaym:factor(zip_code)19114  5.741e-04  1.817e-04   3.159
## dist_to_closest_highwaym:factor(zip_code)19115  7.756e-04  1.810e-04   4.284
## dist_to_closest_highwaym:factor(zip_code)19116  7.958e-04  1.811e-04   4.394
## dist_to_closest_highwaym:factor(zip_code)19118  6.837e-04  1.836e-04   3.725
## dist_to_closest_highwaym:factor(zip_code)19119  8.939e-04  1.817e-04   4.921
## dist_to_closest_highwaym:factor(zip_code)19120  9.434e-04  1.810e-04   5.212
## dist_to_closest_highwaym:factor(zip_code)19121  8.344e-04  1.815e-04   4.598
## dist_to_closest_highwaym:factor(zip_code)19122  1.657e-04  1.828e-04   0.906
## dist_to_closest_highwaym:factor(zip_code)19123  1.506e-04  1.893e-04   0.795
## dist_to_closest_highwaym:factor(zip_code)19124  6.653e-04  1.811e-04   3.674
## dist_to_closest_highwaym:factor(zip_code)19125  3.181e-04  1.818e-04   1.750
## dist_to_closest_highwaym:factor(zip_code)19126  7.213e-04  1.831e-04   3.939
## dist_to_closest_highwaym:factor(zip_code)19127  9.527e-04  1.832e-04   5.199
## dist_to_closest_highwaym:factor(zip_code)19128  8.400e-04  1.807e-04   4.648
## dist_to_closest_highwaym:factor(zip_code)19129  4.032e-04  1.929e-04   2.091
## dist_to_closest_highwaym:factor(zip_code)19130  3.469e-04  1.877e-04   1.848
## dist_to_closest_highwaym:factor(zip_code)19131  2.834e-04  1.814e-04   1.562
## dist_to_closest_highwaym:factor(zip_code)19132  8.471e-04  1.814e-04   4.670
## dist_to_closest_highwaym:factor(zip_code)19133  6.323e-05  1.856e-04   0.341
## dist_to_closest_highwaym:factor(zip_code)19134 -1.265e-04  1.809e-04  -0.699
## dist_to_closest_highwaym:factor(zip_code)19135  6.642e-04  1.827e-04   3.635
## dist_to_closest_highwaym:factor(zip_code)19136  6.627e-04  1.824e-04   3.632
## dist_to_closest_highwaym:factor(zip_code)19137  1.603e-03  1.951e-04   8.219
## dist_to_closest_highwaym:factor(zip_code)19138  1.305e-03  1.812e-04   7.203
## dist_to_closest_highwaym:factor(zip_code)19139  9.590e-04  1.820e-04   5.269
## dist_to_closest_highwaym:factor(zip_code)19140  6.053e-04  1.813e-04   3.339
## dist_to_closest_highwaym:factor(zip_code)19141  9.893e-04  1.813e-04   5.458
## dist_to_closest_highwaym:factor(zip_code)19142  5.484e-04  1.829e-04   2.998
## dist_to_closest_highwaym:factor(zip_code)19143  8.714e-04  1.810e-04   4.815
## dist_to_closest_highwaym:factor(zip_code)19144  9.529e-04  1.810e-04   5.266
## dist_to_closest_highwaym:factor(zip_code)19145  1.053e-03  1.817e-04   5.797
## dist_to_closest_highwaym:factor(zip_code)19146  9.243e-04  1.814e-04   5.096
## dist_to_closest_highwaym:factor(zip_code)19147  5.426e-04  1.815e-04   2.989
## dist_to_closest_highwaym:factor(zip_code)19148  6.664e-04  1.813e-04   3.676
## dist_to_closest_highwaym:factor(zip_code)19149  8.500e-04  1.823e-04   4.662
## dist_to_closest_highwaym:factor(zip_code)19150  6.773e-04  1.823e-04   3.715
## dist_to_closest_highwaym:factor(zip_code)19151  4.405e-04  1.817e-04   2.424
## dist_to_closest_highwaym:factor(zip_code)19152  8.300e-04  1.826e-04   4.546
## dist_to_closest_highwaym:factor(zip_code)19153 -4.377e-04  1.864e-04  -2.348
## dist_to_closest_highwaym:factor(zip_code)19154  5.907e-04  1.816e-04   3.253
## dist_to_closest_highwaym:factor(zip_code)19192         NA         NA      NA
##                                                Pr(>|t|)    
## (Intercept)                                     < 2e-16 ***
## dist_to_closest_highwaym                       3.92e-05 ***
## factor(zip_code)19103                          2.28e-12 ***
## factor(zip_code)19104                           < 2e-16 ***
## factor(zip_code)19106                           < 2e-16 ***
## factor(zip_code)19107                          6.41e-14 ***
## factor(zip_code)19109                          0.016547 *  
## factor(zip_code)19111                           < 2e-16 ***
## factor(zip_code)19112                          0.133123    
## factor(zip_code)19114                           < 2e-16 ***
## factor(zip_code)19115                           < 2e-16 ***
## factor(zip_code)19116                          1.57e-13 ***
## factor(zip_code)19118                          0.000172 ***
## factor(zip_code)19119                           < 2e-16 ***
## factor(zip_code)19120                           < 2e-16 ***
## factor(zip_code)19121                           < 2e-16 ***
## factor(zip_code)19122                           < 2e-16 ***
## factor(zip_code)19123                           < 2e-16 ***
## factor(zip_code)19124                           < 2e-16 ***
## factor(zip_code)19125                           < 2e-16 ***
## factor(zip_code)19126                           < 2e-16 ***
## factor(zip_code)19127                           < 2e-16 ***
## factor(zip_code)19128                           < 2e-16 ***
## factor(zip_code)19129                           < 2e-16 ***
## factor(zip_code)19130                          1.99e-15 ***
## factor(zip_code)19131                           < 2e-16 ***
## factor(zip_code)19132                           < 2e-16 ***
## factor(zip_code)19133                           < 2e-16 ***
## factor(zip_code)19134                           < 2e-16 ***
## factor(zip_code)19135                           < 2e-16 ***
## factor(zip_code)19136                           < 2e-16 ***
## factor(zip_code)19137                           < 2e-16 ***
## factor(zip_code)19138                           < 2e-16 ***
## factor(zip_code)19139                           < 2e-16 ***
## factor(zip_code)19140                           < 2e-16 ***
## factor(zip_code)19141                           < 2e-16 ***
## factor(zip_code)19142                           < 2e-16 ***
## factor(zip_code)19143                           < 2e-16 ***
## factor(zip_code)19144                           < 2e-16 ***
## factor(zip_code)19145                           < 2e-16 ***
## factor(zip_code)19146                           < 2e-16 ***
## factor(zip_code)19147                           < 2e-16 ***
## factor(zip_code)19148                           < 2e-16 ***
## factor(zip_code)19149                           < 2e-16 ***
## factor(zip_code)19150                          1.39e-14 ***
## factor(zip_code)19151                           < 2e-16 ***
## factor(zip_code)19152                           < 2e-16 ***
## factor(zip_code)19153                          0.064321 .  
## factor(zip_code)19154                           < 2e-16 ***
## factor(zip_code)19192                          0.118919    
## total_area                                      < 2e-16 ***
## year_built                                      < 2e-16 ***
## dist_parkm                                      < 2e-16 ***
## dist_libm                                       < 2e-16 ***
## dist_cityhallm                                 4.65e-11 ***
## dist_hospitalm                                  < 2e-16 ***
## dist_historicm                                  < 2e-16 ***
## dist_to_closest_highwaym:factor(zip_code)19103 4.64e-08 ***
## dist_to_closest_highwaym:factor(zip_code)19104 1.76e-14 ***
## dist_to_closest_highwaym:factor(zip_code)19106 1.46e-15 ***
## dist_to_closest_highwaym:factor(zip_code)19107 5.95e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19109       NA    
## dist_to_closest_highwaym:factor(zip_code)19111 2.01e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19112 0.023749 *  
## dist_to_closest_highwaym:factor(zip_code)19114 0.001585 ** 
## dist_to_closest_highwaym:factor(zip_code)19115 1.83e-05 ***
## dist_to_closest_highwaym:factor(zip_code)19116 1.12e-05 ***
## dist_to_closest_highwaym:factor(zip_code)19118 0.000196 ***
## dist_to_closest_highwaym:factor(zip_code)19119 8.64e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19120 1.87e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19121 4.27e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19122 0.364812    
## dist_to_closest_highwaym:factor(zip_code)19123 0.426412    
## dist_to_closest_highwaym:factor(zip_code)19124 0.000239 ***
## dist_to_closest_highwaym:factor(zip_code)19125 0.080129 .  
## dist_to_closest_highwaym:factor(zip_code)19126 8.19e-05 ***
## dist_to_closest_highwaym:factor(zip_code)19127 2.00e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19128 3.35e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19129 0.036563 *  
## dist_to_closest_highwaym:factor(zip_code)19130 0.064560 .  
## dist_to_closest_highwaym:factor(zip_code)19131 0.118233    
## dist_to_closest_highwaym:factor(zip_code)19132 3.02e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19133 0.733377    
## dist_to_closest_highwaym:factor(zip_code)19134 0.484350    
## dist_to_closest_highwaym:factor(zip_code)19135 0.000278 ***
## dist_to_closest_highwaym:factor(zip_code)19136 0.000281 ***
## dist_to_closest_highwaym:factor(zip_code)19137  < 2e-16 ***
## dist_to_closest_highwaym:factor(zip_code)19138 5.91e-13 ***
## dist_to_closest_highwaym:factor(zip_code)19139 1.37e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19140 0.000841 ***
## dist_to_closest_highwaym:factor(zip_code)19141 4.82e-08 ***
## dist_to_closest_highwaym:factor(zip_code)19142 0.002718 ** 
## dist_to_closest_highwaym:factor(zip_code)19143 1.47e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19144 1.40e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19145 6.76e-09 ***
## dist_to_closest_highwaym:factor(zip_code)19146 3.47e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19147 0.002801 ** 
## dist_to_closest_highwaym:factor(zip_code)19148 0.000237 ***
## dist_to_closest_highwaym:factor(zip_code)19149 3.13e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19150 0.000204 ***
## dist_to_closest_highwaym:factor(zip_code)19151 0.015334 *  
## dist_to_closest_highwaym:factor(zip_code)19152 5.47e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19153 0.018892 *  
## dist_to_closest_highwaym:factor(zip_code)19154 0.001142 ** 
## dist_to_closest_highwaym:factor(zip_code)19192       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.808 on 243900 degrees of freedom
## Multiple R-squared:  0.3712, Adjusted R-squared:  0.371 
## F-statistic:  1412 on 102 and 243900 DF,  p-value: < 2.2e-16

Scaling all numeric variables to directly compare effect sizes.

scaled_vars <- cleaned_props %>% 
  select(log_price, dist_to_closest_highwaym, total_area, year_built, dist_parkm, dist_libm, dist_cityhallm, dist_hospitalm, dist_historicm, zip_code) %>% 
  mutate(across(c(dist_to_closest_highwaym, total_area, year_built, dist_parkm, dist_libm, dist_cityhallm, dist_hospitalm, dist_historicm), scale))

# model with scaled vars and no interactions 
scaled_mult_reg <- lm(log_price ~ dist_to_closest_highwaym + 
                 total_area + year_built + # internal characteristics
                 dist_parkm + dist_libm + dist_cityhallm + dist_hospitalm + dist_historicm + # external / amenities
                 factor(zip_code), # neighborhood effects (via zip code for now)
               data = scaled_vars)

summary(scaled_mult_reg)
## 
## Call:
## lm(formula = log_price ~ dist_to_closest_highwaym + total_area + 
##     year_built + dist_parkm + dist_libm + dist_cityhallm + dist_hospitalm + 
##     dist_historicm + factor(zip_code), data = scaled_vars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.7283 -0.4026  0.0158  0.4234  5.9466 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              14.667004   0.072910 201.167  < 2e-16 ***
## dist_to_closest_highwaym -0.041383   0.003386 -12.222  < 2e-16 ***
## total_area                0.020262   0.001680  12.059  < 2e-16 ***
## year_built                0.046906   0.001738  26.994  < 2e-16 ***
## dist_parkm                0.036251   0.001960  18.498  < 2e-16 ***
## dist_libm                 0.010648   0.002089   5.098 3.44e-07 ***
## dist_cityhallm            0.177281   0.011647  15.222  < 2e-16 ***
## dist_hospitalm           -0.153718   0.003400 -45.216  < 2e-16 ***
## dist_historicm            0.053551   0.003821  14.016  < 2e-16 ***
## factor(zip_code)19103    -0.529311   0.074951  -7.062 1.64e-12 ***
## factor(zip_code)19104    -2.507668   0.072452 -34.612  < 2e-16 ***
## factor(zip_code)19106    -0.943043   0.075748 -12.450  < 2e-16 ***
## factor(zip_code)19107    -0.721091   0.077604  -9.292  < 2e-16 ***
## factor(zip_code)19109     1.210102   0.484179   2.499   0.0124 *  
## factor(zip_code)19111    -2.780951   0.076553 -36.327  < 2e-16 ***
## factor(zip_code)19112    -0.015193   0.241531  -0.063   0.9498    
## factor(zip_code)19114    -2.656065   0.082073 -32.362  < 2e-16 ***
## factor(zip_code)19115    -2.282731   0.081874 -27.881  < 2e-16 ***
## factor(zip_code)19116    -2.430872   0.085714 -28.360  < 2e-16 ***
## factor(zip_code)19118    -1.376155   0.079551 -17.299  < 2e-16 ***
## factor(zip_code)19119    -2.011487   0.075536 -26.630  < 2e-16 ***
## factor(zip_code)19120    -3.359982   0.074291 -45.227  < 2e-16 ***
## factor(zip_code)19121    -2.423843   0.072100 -33.618  < 2e-16 ***
## factor(zip_code)19122    -2.314892   0.072881 -31.763  < 2e-16 ***
## factor(zip_code)19123    -1.583329   0.073023 -21.682  < 2e-16 ***
## factor(zip_code)19124    -3.329156   0.074463 -44.709  < 2e-16 ***
## factor(zip_code)19125    -2.247696   0.072614 -30.954  < 2e-16 ***
## factor(zip_code)19126    -2.739012   0.076846 -35.643  < 2e-16 ***
## factor(zip_code)19127    -2.058590   0.076196 -27.017  < 2e-16 ***
## factor(zip_code)19128    -2.220476   0.074555 -29.783  < 2e-16 ***
## factor(zip_code)19129    -2.342086   0.074994 -31.230  < 2e-16 ***
## factor(zip_code)19130    -1.372904   0.072362 -18.973  < 2e-16 ***
## factor(zip_code)19131    -2.987252   0.072954 -40.947  < 2e-16 ***
## factor(zip_code)19132    -3.666521   0.072394 -50.647  < 2e-16 ***
## factor(zip_code)19133    -3.644950   0.073305 -49.723  < 2e-16 ***
## factor(zip_code)19134    -3.428068   0.072836 -47.065  < 2e-16 ***
## factor(zip_code)19135    -2.806929   0.076256 -36.809  < 2e-16 ***
## factor(zip_code)19136    -2.781071   0.078306 -35.516  < 2e-16 ***
## factor(zip_code)19137    -2.764584   0.077193 -35.814  < 2e-16 ***
## factor(zip_code)19138    -3.059442   0.075325 -40.616  < 2e-16 ***
## factor(zip_code)19139    -3.057536   0.072677 -42.070  < 2e-16 ***
## factor(zip_code)19140    -3.754406   0.072829 -51.551  < 2e-16 ***
## factor(zip_code)19141    -3.218457   0.074651 -43.113  < 2e-16 ***
## factor(zip_code)19142    -3.112464   0.073126 -42.563  < 2e-16 ***
## factor(zip_code)19143    -2.896724   0.072309 -40.060  < 2e-16 ***
## factor(zip_code)19144    -2.792299   0.073995 -37.736  < 2e-16 ***
## factor(zip_code)19145    -2.512029   0.071683 -35.044  < 2e-16 ***
## factor(zip_code)19146    -2.039442   0.071495 -28.526  < 2e-16 ***
## factor(zip_code)19147    -1.607364   0.071667 -22.428  < 2e-16 ***
## factor(zip_code)19148    -2.419911   0.071705 -33.748  < 2e-16 ***
## factor(zip_code)19149    -2.887972   0.076118 -37.941  < 2e-16 ***
## factor(zip_code)19150    -2.482971   0.077218 -32.155  < 2e-16 ***
## factor(zip_code)19151    -2.842632   0.073626 -38.609  < 2e-16 ***
## factor(zip_code)19152    -2.675783   0.079072 -33.840  < 2e-16 ***
## factor(zip_code)19153    -2.426069   0.075484 -32.140  < 2e-16 ***
## factor(zip_code)19154    -2.879380   0.085636 -33.623  < 2e-16 ***
## factor(zip_code)19192    -1.054654   0.832615  -1.267   0.2053    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8296 on 243946 degrees of freedom
## Multiple R-squared:  0.337,  Adjusted R-squared:  0.3369 
## F-statistic:  2215 on 56 and 243946 DF,  p-value: < 2.2e-16
# model with scaled vars and interaction between dist to closest highway and ZIP
scaled_mult_reg_int <- lm(log_price ~ dist_to_closest_highwaym * factor(zip_code) + 
                     total_area + year_built + # internal characteristics
                     dist_parkm + dist_libm + dist_cityhallm + dist_hospitalm + dist_historicm, # external / amenities
                   data = scaled_vars)

summary(scaled_mult_reg_int)
## 
## Call:
## lm(formula = log_price ~ dist_to_closest_highwaym * factor(zip_code) + 
##     total_area + year_built + dist_parkm + dist_libm + dist_cityhallm + 
##     dist_hospitalm + dist_historicm, data = scaled_vars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.5339 -0.3954  0.0125  0.4030  5.6840 
## 
## Coefficients: (2 not defined because of singularities)
##                                                 Estimate Std. Error t value
## (Intercept)                                    13.572930   0.133979 101.307
## dist_to_closest_highwaym                       -0.940615   0.228746  -4.112
## factor(zip_code)19103                           0.330072   0.156196   2.113
## factor(zip_code)19104                          -1.408350   0.134142 -10.499
## factor(zip_code)19106                           0.829401   0.191028   4.342
## factor(zip_code)19107                          -0.135495   0.143970  -0.941
## factor(zip_code)19109                           1.131406   0.472079   2.397
## factor(zip_code)19111                          -1.538953   0.136868 -11.244
## factor(zip_code)19112                           1.873142   0.682042   2.746
## factor(zip_code)19114                          -1.122219   0.141755  -7.917
## factor(zip_code)19115                          -0.643690   0.141431  -4.551
## factor(zip_code)19116                          -0.230560   0.145248  -1.587
## factor(zip_code)19118                           0.012282   0.214759   0.057
## factor(zip_code)19119                          -1.276944   0.144665  -8.827
## factor(zip_code)19120                          -2.037583   0.135467 -15.041
## factor(zip_code)19121                          -1.699964   0.132979 -12.784
## factor(zip_code)19122                          -1.707426   0.133782 -12.763
## factor(zip_code)19123                          -1.554385   0.148651 -10.457
## factor(zip_code)19124                          -1.989688   0.134910 -14.748
## factor(zip_code)19125                          -1.609034   0.134899 -11.928
## factor(zip_code)19126                          -1.385400   0.147894  -9.368
## factor(zip_code)19127                          -1.147277   0.137465  -8.346
## factor(zip_code)19128                          -1.376683   0.134861 -10.208
## factor(zip_code)19129                          -1.878192   0.155389 -12.087
## factor(zip_code)19130                          -1.055539   0.137972  -7.650
## factor(zip_code)19131                          -2.103911   0.133673 -15.739
## factor(zip_code)19132                          -2.877606   0.133360 -21.578
## factor(zip_code)19133                          -2.210926   0.138348 -15.981
## factor(zip_code)19134                          -2.494904   0.133763 -18.652
## factor(zip_code)19135                          -1.399868   0.138517 -10.106
## factor(zip_code)19136                          -1.413564   0.139262 -10.150
## factor(zip_code)19137                          -0.101344   0.163125  -0.621
## factor(zip_code)19138                          -3.055422   0.138189 -22.110
## factor(zip_code)19139                          -2.302824   0.135429 -17.004
## factor(zip_code)19140                          -2.972104   0.133784 -22.216
## factor(zip_code)19141                          -2.239746   0.135042 -16.586
## factor(zip_code)19142                          -1.953265   0.137641 -14.191
## factor(zip_code)19143                          -2.084560   0.133395 -15.627
## factor(zip_code)19144                          -1.834627   0.134525 -13.638
## factor(zip_code)19145                          -1.681240   0.133048 -12.636
## factor(zip_code)19146                          -1.304594   0.132816  -9.823
## factor(zip_code)19147                          -1.113810   0.133341  -8.353
## factor(zip_code)19148                          -1.669698   0.133081 -12.546
## factor(zip_code)19149                          -1.371280   0.139817  -9.808
## factor(zip_code)19150                          -0.792540   0.171537  -4.620
## factor(zip_code)19151                          -1.977272   0.134252 -14.728
## factor(zip_code)19152                          -1.277553   0.141465  -9.031
## factor(zip_code)19153                          -1.131578   0.135705  -8.338
## factor(zip_code)19154                          -0.807611   0.145804  -5.539
## factor(zip_code)19192                          -1.267090   0.812586  -1.559
## total_area                                      0.019950   0.001637  12.190
## year_built                                      0.042689   0.001698  25.143
## dist_parkm                                      0.028203   0.001961  14.379
## dist_libm                                       0.020993   0.002209   9.505
## dist_cityhallm                                 -0.096456   0.014654  -6.582
## dist_hospitalm                                 -0.206689   0.004053 -50.997
## dist_historicm                                 -0.078868   0.004749 -16.609
## dist_to_closest_highwaym:factor(zip_code)19103  1.351306   0.247277   5.465
## dist_to_closest_highwaym:factor(zip_code)19104  1.773832   0.231336   7.668
## dist_to_closest_highwaym:factor(zip_code)19106  2.098450   0.262941   7.981
## dist_to_closest_highwaym:factor(zip_code)19107  1.102255   0.243422   4.528
## dist_to_closest_highwaym:factor(zip_code)19109        NA         NA      NA
## dist_to_closest_highwaym:factor(zip_code)19111  1.088888   0.229114   4.753
## dist_to_closest_highwaym:factor(zip_code)19112  2.222729   0.982994   2.261
## dist_to_closest_highwaym:factor(zip_code)19114  0.727050   0.230173   3.159
## dist_to_closest_highwaym:factor(zip_code)19115  0.982316   0.229281   4.284
## dist_to_closest_highwaym:factor(zip_code)19116  1.007892   0.229397   4.394
## dist_to_closest_highwaym:factor(zip_code)19118  0.865879   0.232468   3.725
## dist_to_closest_highwaym:factor(zip_code)19119  1.132126   0.230082   4.921
## dist_to_closest_highwaym:factor(zip_code)19120  1.194875   0.229270   5.212
## dist_to_closest_highwaym:factor(zip_code)19121  1.056813   0.229853   4.598
## dist_to_closest_highwaym:factor(zip_code)19122  0.209839   0.231550   0.906
## dist_to_closest_highwaym:factor(zip_code)19123  0.190724   0.239799   0.795
## dist_to_closest_highwaym:factor(zip_code)19124  0.842642   0.229341   3.674
## dist_to_closest_highwaym:factor(zip_code)19125  0.402901   0.230236   1.750
## dist_to_closest_highwaym:factor(zip_code)19126  0.913589   0.231940   3.939
## dist_to_closest_highwaym:factor(zip_code)19127  1.206601   0.232072   5.199
## dist_to_closest_highwaym:factor(zip_code)19128  1.063832   0.228874   4.648
## dist_to_closest_highwaym:factor(zip_code)19129  0.510664   0.244264   2.091
## dist_to_closest_highwaym:factor(zip_code)19130  0.439416   0.237740   1.848
## dist_to_closest_highwaym:factor(zip_code)19131  0.358948   0.229765   1.562
## dist_to_closest_highwaym:factor(zip_code)19132  1.072811   0.229741   4.670
## dist_to_closest_highwaym:factor(zip_code)19133  0.080086   0.235106   0.341
## dist_to_closest_highwaym:factor(zip_code)19134 -0.160232   0.229124  -0.699
## dist_to_closest_highwaym:factor(zip_code)19135  0.841173   0.231404   3.635
## dist_to_closest_highwaym:factor(zip_code)19136  0.839274   0.231057   3.632
## dist_to_closest_highwaym:factor(zip_code)19137  2.030485   0.247053   8.219
## dist_to_closest_highwaym:factor(zip_code)19138  1.652908   0.229476   7.203
## dist_to_closest_highwaym:factor(zip_code)19139  1.214585   0.230514   5.269
## dist_to_closest_highwaym:factor(zip_code)19140  0.766626   0.229588   3.339
## dist_to_closest_highwaym:factor(zip_code)19141  1.253000   0.229576   5.458
## dist_to_closest_highwaym:factor(zip_code)19142  0.694584   0.231687   2.998
## dist_to_closest_highwaym:factor(zip_code)19143  1.103604   0.229197   4.815
## dist_to_closest_highwaym:factor(zip_code)19144  1.206826   0.229182   5.266
## dist_to_closest_highwaym:factor(zip_code)19145  1.333953   0.230112   5.797
## dist_to_closest_highwaym:factor(zip_code)19146  1.170615   0.229708   5.096
## dist_to_closest_highwaym:factor(zip_code)19147  0.687202   0.229927   2.989
## dist_to_closest_highwaym:factor(zip_code)19148  0.844057   0.229632   3.676
## dist_to_closest_highwaym:factor(zip_code)19149  1.076480   0.230911   4.662
## dist_to_closest_highwaym:factor(zip_code)19150  0.857834   0.230941   3.715
## dist_to_closest_highwaym:factor(zip_code)19151  0.557854   0.230098   2.424
## dist_to_closest_highwaym:factor(zip_code)19152  1.051156   0.231227   4.546
## dist_to_closest_highwaym:factor(zip_code)19153 -0.554331   0.236120  -2.348
## dist_to_closest_highwaym:factor(zip_code)19154  0.748083   0.229961   3.253
## dist_to_closest_highwaym:factor(zip_code)19192        NA         NA      NA
##                                                Pr(>|t|)    
## (Intercept)                                     < 2e-16 ***
## dist_to_closest_highwaym                       3.92e-05 ***
## factor(zip_code)19103                          0.034585 *  
## factor(zip_code)19104                           < 2e-16 ***
## factor(zip_code)19106                          1.41e-05 ***
## factor(zip_code)19107                          0.346637    
## factor(zip_code)19109                          0.016547 *  
## factor(zip_code)19111                           < 2e-16 ***
## factor(zip_code)19112                          0.006026 ** 
## factor(zip_code)19114                          2.45e-15 ***
## factor(zip_code)19115                          5.33e-06 ***
## factor(zip_code)19116                          0.112433    
## factor(zip_code)19118                          0.954396    
## factor(zip_code)19119                           < 2e-16 ***
## factor(zip_code)19120                           < 2e-16 ***
## factor(zip_code)19121                           < 2e-16 ***
## factor(zip_code)19122                           < 2e-16 ***
## factor(zip_code)19123                           < 2e-16 ***
## factor(zip_code)19124                           < 2e-16 ***
## factor(zip_code)19125                           < 2e-16 ***
## factor(zip_code)19126                           < 2e-16 ***
## factor(zip_code)19127                           < 2e-16 ***
## factor(zip_code)19128                           < 2e-16 ***
## factor(zip_code)19129                           < 2e-16 ***
## factor(zip_code)19130                          2.01e-14 ***
## factor(zip_code)19131                           < 2e-16 ***
## factor(zip_code)19132                           < 2e-16 ***
## factor(zip_code)19133                           < 2e-16 ***
## factor(zip_code)19134                           < 2e-16 ***
## factor(zip_code)19135                           < 2e-16 ***
## factor(zip_code)19136                           < 2e-16 ***
## factor(zip_code)19137                          0.534423    
## factor(zip_code)19138                           < 2e-16 ***
## factor(zip_code)19139                           < 2e-16 ***
## factor(zip_code)19140                           < 2e-16 ***
## factor(zip_code)19141                           < 2e-16 ***
## factor(zip_code)19142                           < 2e-16 ***
## factor(zip_code)19143                           < 2e-16 ***
## factor(zip_code)19144                           < 2e-16 ***
## factor(zip_code)19145                           < 2e-16 ***
## factor(zip_code)19146                           < 2e-16 ***
## factor(zip_code)19147                           < 2e-16 ***
## factor(zip_code)19148                           < 2e-16 ***
## factor(zip_code)19149                           < 2e-16 ***
## factor(zip_code)19150                          3.84e-06 ***
## factor(zip_code)19151                           < 2e-16 ***
## factor(zip_code)19152                           < 2e-16 ***
## factor(zip_code)19153                           < 2e-16 ***
## factor(zip_code)19154                          3.04e-08 ***
## factor(zip_code)19192                          0.118919    
## total_area                                      < 2e-16 ***
## year_built                                      < 2e-16 ***
## dist_parkm                                      < 2e-16 ***
## dist_libm                                       < 2e-16 ***
## dist_cityhallm                                 4.65e-11 ***
## dist_hospitalm                                  < 2e-16 ***
## dist_historicm                                  < 2e-16 ***
## dist_to_closest_highwaym:factor(zip_code)19103 4.64e-08 ***
## dist_to_closest_highwaym:factor(zip_code)19104 1.76e-14 ***
## dist_to_closest_highwaym:factor(zip_code)19106 1.46e-15 ***
## dist_to_closest_highwaym:factor(zip_code)19107 5.95e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19109       NA    
## dist_to_closest_highwaym:factor(zip_code)19111 2.01e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19112 0.023749 *  
## dist_to_closest_highwaym:factor(zip_code)19114 0.001585 ** 
## dist_to_closest_highwaym:factor(zip_code)19115 1.83e-05 ***
## dist_to_closest_highwaym:factor(zip_code)19116 1.12e-05 ***
## dist_to_closest_highwaym:factor(zip_code)19118 0.000196 ***
## dist_to_closest_highwaym:factor(zip_code)19119 8.64e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19120 1.87e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19121 4.27e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19122 0.364812    
## dist_to_closest_highwaym:factor(zip_code)19123 0.426412    
## dist_to_closest_highwaym:factor(zip_code)19124 0.000239 ***
## dist_to_closest_highwaym:factor(zip_code)19125 0.080129 .  
## dist_to_closest_highwaym:factor(zip_code)19126 8.19e-05 ***
## dist_to_closest_highwaym:factor(zip_code)19127 2.00e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19128 3.35e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19129 0.036563 *  
## dist_to_closest_highwaym:factor(zip_code)19130 0.064560 .  
## dist_to_closest_highwaym:factor(zip_code)19131 0.118233    
## dist_to_closest_highwaym:factor(zip_code)19132 3.02e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19133 0.733377    
## dist_to_closest_highwaym:factor(zip_code)19134 0.484350    
## dist_to_closest_highwaym:factor(zip_code)19135 0.000278 ***
## dist_to_closest_highwaym:factor(zip_code)19136 0.000281 ***
## dist_to_closest_highwaym:factor(zip_code)19137  < 2e-16 ***
## dist_to_closest_highwaym:factor(zip_code)19138 5.91e-13 ***
## dist_to_closest_highwaym:factor(zip_code)19139 1.37e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19140 0.000841 ***
## dist_to_closest_highwaym:factor(zip_code)19141 4.82e-08 ***
## dist_to_closest_highwaym:factor(zip_code)19142 0.002718 ** 
## dist_to_closest_highwaym:factor(zip_code)19143 1.47e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19144 1.40e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19145 6.76e-09 ***
## dist_to_closest_highwaym:factor(zip_code)19146 3.47e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19147 0.002801 ** 
## dist_to_closest_highwaym:factor(zip_code)19148 0.000237 ***
## dist_to_closest_highwaym:factor(zip_code)19149 3.13e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19150 0.000204 ***
## dist_to_closest_highwaym:factor(zip_code)19151 0.015334 *  
## dist_to_closest_highwaym:factor(zip_code)19152 5.47e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19153 0.018892 *  
## dist_to_closest_highwaym:factor(zip_code)19154 0.001142 ** 
## dist_to_closest_highwaym:factor(zip_code)19192       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.808 on 243900 degrees of freedom
## Multiple R-squared:  0.3712, Adjusted R-squared:  0.371 
## F-statistic:  1412 on 102 and 243900 DF,  p-value: < 2.2e-16

Relationship between Housing Price and other variables

First, I filtered the property data in the following ways.

  • Keep properties whose sale date is between Jan. 1st, 2020 - Dec.31st, 2024

  • Adjust sales prices for inflation

  • Remove properties whose sale price is < $30,000 and > 5 SD from the mean sale price

  • Remove properties whose total area is < 100 sq. ft.

Then, I calculated each property’s distance to the four highways of interest as well as amenities. The amenities that we’ve looked into so far are parks, libraries, transit stops, schools, City Hall (proxy for the central business district), hospitals, museums, and historic sites. We also included distance to prisons as a disamenity.

We still need to look at sale price’s relationship with restaurants and grocery stores (I just need to figure out how to use the google API to do this).

# plot of log_price_perTA
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
    geom_sf(data = cleaned_props, aes(color = log_price_perTA), size = 0.2) +
  scale_color_gradient(high = "#eb5600",
                       low = "#f7cfb7") +
  labs(title = "Property Price (log sale price per total area)",
       color = "Log Price") +
  theme_void()

I’m going to go through the following columns of interest (amenity predictors) and make a map + scatterplot and get the Pearson correlation coefficient between the amenity/disamenity + sale price (log_price + log_price_perTA).

  • dist_to_closest_highwaym

  • dist_parkm

  • dist_libm

  • dist_transitm

  • dist_schoolm

  • dist_cityhallm

  • dist_hospitalm

  • dist_museumm

  • dist_prisonm

  • dist_historicm

# only keeping two outcomes (log_price + log_price_perTA) and all amenities
vars_of_int <- cleaned_props %>% 
  select(log_price, log_price_perTA, 
         dist_to_closest_highwaym, dist_parkm, dist_libm, dist_transitm, dist_schoolm, dist_cityhallm, dist_hospitalm, dist_museumm, dist_prisonm, dist_historicm)
# cor matrix for corplot
cors_logprice <- cor(vars_of_int %>% 
                       select(-log_price_perTA) %>% 
                       rename(dist_2clssthghwy = dist_to_closest_highwaym) %>% 
                       st_drop_geometry())

cors_logpriceperTA <- cor(vars_of_int %>% 
                            select(-log_price) %>% 
                            rename(dist_2clssthghwy = dist_to_closest_highwaym) %>% 
                            st_drop_geometry())

# significance test 
sigtest_logprice <- cor.mtest(vars_of_int %>% 
                                select(-log_price_perTA) %>% 
                                rename(dist_2clssthghwy = dist_to_closest_highwaym) %>% 
                                st_drop_geometry())

sigtest_logpriceperTA <- cor.mtest(vars_of_int %>% 
                                     select(-log_price) %>% 
                                     rename(dist_2clssthghwy = dist_to_closest_highwaym) %>% 
                                     st_drop_geometry())

# pretty correlation matrix of all continuous variables
corrplot(cors_logprice, 
         title = "Correlation Matrix for Log Price",
         p.mat = sigtest_logprice$p, 
         method = 'circle', 
         type = 'lower', 
         insig='blank',
         addCoef.col ='black', 
         number.cex = 0.8, 
         diag=FALSE)

corrplot(cors_logpriceperTA, 
         title = "Correlation Matrix for Log Price",
         p.mat = sigtest_logpriceperTA$p, 
         method = 'circle', 
         type = 'lower', 
         insig='blank',
         addCoef.col ='black', 
         number.cex = 0.8, 
         diag=FALSE)

From the correlation matrix, we can see that distance to City Hall, distance to Transit, and distance to museums are all highly collinear with correlation coefficients ranging from .78 (transit and museum) to .98 (museum and city hall). Interestingly, distance to city hall and distance to museum are the variables with the correlation coefficient of highest magnitude with log_price_perTA (at -.38 and -.34 respectively), but distance to transit is not as correlated to the outcome variable (-.18).

When considering all properties in Philadelphia (after cleaning this data a bit), we see that distance to closest highway now has a negative relationship with property sale price (-0.18).

I’m moving forward with log_price_perTA as outcome for now because the size of the property explains a lot of the variance in property sale price already and right now we are focused on external predictors.

Distance to Parks

dat <- vars_of_int %>% 
  select(log_price_perTA, dist_parkm)

scat_title <- "Log Price (per total area) as a function of Distance to nearest Park"

cor_toplot <- round(cor(dat$dist_parkm, dat$log_price_perTA), 3)

# scatter
ggplot(dat,
       aes(x = dist_parkm,
           y = log_price_perTA)) +
  geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
  geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
  geom_text(aes(label = cor_toplot,
                x = 2000,
                y = 10,
                color = "#eb5600"),
            show.legend = F) +
  labs(title = scat_title,
       x = "Distance to nearest Park (m)",
       y = "Log Price (per total area)") +
  theme_minimal()

# map
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = dat, size = 0.2,
          aes(color = dist_parkm)) +
  scale_color_gradient(high = "#f7cfb7",
                       low = "#eb5600") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent", alpha = 0.75) +
  labs(title = "Distance to nearest park",
       color = "Dist. to park") +
  theme_void()

Looking at distance to park without the extreme outlier that is almost 2500m from the nearest park

dat <- vars_of_int %>% 
  select(log_price_perTA, dist_parkm) %>% 
  filter(dist_parkm < 2000)

scat_title <- "Log Price (per total area) as a function of Distance to nearest Park"

cor_toplot <- round(cor(dat$dist_parkm, dat$log_price_perTA), 3)

# scatter
ggplot(dat,
       aes(x = dist_parkm,
           y = log_price_perTA)) +
  geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
  geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
  geom_text(aes(label = cor_toplot,
                x = 1500,
                y = 10,
                color = "#eb5600"),
            show.legend = F) +
  labs(title = scat_title,
       subtitle = "Removed outlier",
       x = "Distance to nearest Park (m)",
       y = "Log Price (per total area)") +
  theme_minimal()

Removing the outlier had no effect on the correlation.

Distance to Libraries

dat <- vars_of_int %>% 
  select(log_price_perTA, dist_libm)

scat_title <- "Log Price (per total area) as a function of Distance to nearest Library"

cor_toplot <- round(cor(dat$dist_libm, dat$log_price_perTA), 3)

# scatter
ggplot(dat,
       aes(x = dist_libm,
           y = log_price_perTA)) +
  geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
  geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
  geom_text(aes(label = cor_toplot,
                x = 3000,
                y = 10,
                color = "#eb5600"),
            show.legend = F) +
  labs(title = scat_title,
       x = "Distance to nearest Library (m)",
       y = "Log Price (per total area)") +
  theme_minimal()

# map
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = dat, size = 0.2,
          aes(color = dist_libm)) +
  geom_sf(data = libraries, color = "#faef73") +
  scale_color_gradient(high = "#f7cfb7",
                       low = "#eb5600") +
  labs(title = "Distance to nearest Library",
       color = "Dist. to Lib.") +
  theme_void()

Distance to Transit

dat <- vars_of_int %>% 
  select(log_price_perTA, dist_transitm)

scat_title <- "Log Price (per total area) as a function of Distance to nearest Transit Stop"

cor_toplot <- round(cor(dat$dist_transitm, dat$log_price_perTA), 3)

# scatter
ggplot(dat,
       aes(x = dist_transitm,
           y = log_price_perTA)) +
  geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
  geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
  geom_text(aes(label = cor_toplot,
                x = 10000,
                y = 10,
                color = "#eb5600"),
            show.legend = F) +
  labs(title = scat_title,
       x = "Distance to nearest Transit Stop (m)",
       y = "Log Price (per total area)") +
  theme_minimal()

# map
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = dat, size = 0.2,
          aes(color = dist_transitm)) +
  geom_sf(data = transit, color = "#faef73") +
  scale_color_gradient(high = "#f7cfb7",
                       low = "#eb5600") +
  labs(title = "Distance to nearest Transit stop",
       color = "Dist. to station") +
  theme_void()

Distance to Schools

dat <- vars_of_int %>% 
  select(log_price_perTA, dist_schoolm)

scat_title <- "Log Price (per total area) as a function of Distance to nearest School"

cor_toplot <- round(cor(dat$dist_schoolm, dat$log_price_perTA), 3)

# scatter
ggplot(dat,
       aes(x = dist_schoolm,
           y = log_price_perTA)) +
  geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
  geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
  geom_text(aes(label = cor_toplot,
                x = 2000,
                y = 10,
                color = "#eb5600"),
            show.legend = F) +
  labs(title = scat_title,
       x = "Distance to nearest School (m)",
       y = "Log Price (per total area)") +
  theme_minimal()

# map
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = dat, size = 0.2,
          aes(color = dist_schoolm)) +
  geom_sf(data = school, color = "#faef73") +
  scale_color_gradient(high = "#f7cfb7",
                       low = "#eb5600") +
  labs(title = "Distance to nearest School",
       color = "Dist. to School") +
  theme_void()

Distance to City Hall

dat <- vars_of_int %>% 
  select(log_price_perTA, dist_cityhallm)

scat_title <- "Log Price (per total area) as a function of Distance to City Hall"

cor_toplot <- round(cor(dat$dist_cityhallm, dat$log_price_perTA), 3)

# scatter
ggplot(dat,
       aes(x = dist_cityhallm,
           y = log_price_perTA)) +
  geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
  geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
  geom_text(aes(label = cor_toplot,
                x = 20000,
                y = 10,
                color = "#eb5600"),
            show.legend = F) +
  labs(title = scat_title,
       x = "Distance to City Hall (m)",
       y = "Log Price (per total area)") +
  theme_minimal()

# map
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = dat, size = 0.2,
          aes(color = dist_cityhallm)) +
  geom_sf(data = city_hall, color = "#faef73") +
  scale_color_gradient(high = "#f7cfb7",
                       low = "#eb5600") +
  labs(title = "Distance to City Hall",
       color = "Dist. to CH") +
  theme_void()

Distance to Hospitals

dat <- vars_of_int %>% 
  select(log_price_perTA, dist_hospitalm)

scat_title <- "Log Price (per total area) as a function of Distance to nearest Hospital"

cor_toplot <- round(cor(dat$dist_hospitalm, dat$log_price_perTA), 3)

# scatter
ggplot(dat,
       aes(x = dist_hospitalm,
           y = log_price_perTA)) +
  geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
  geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
  geom_text(aes(label = cor_toplot,
                x = 5000,
                y = 10,
                color = "#eb5600"),
            show.legend = F) +
  labs(title = scat_title,
       x = "Distance to nearest Hospital (m)",
       y = "Log Price (per total area)") +
  theme_minimal()

# map
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = dat, size = 0.2,
          aes(color = dist_hospitalm)) +
  geom_sf(data = hospitals, color = "#faef73") +
  scale_color_gradient(high = "#f7cfb7",
                       low = "#eb5600") +
  labs(title = "Distance to nearest Hospital",
       color = "Dist. to Hospital") +
  theme_void()

Distance to Museums

dat <- vars_of_int %>% 
  select(log_price_perTA, dist_museumm)

scat_title <- "Log Price (per total area) as a function of Distance to nearest Museum"

cor_toplot <- round(cor(dat$dist_museumm, dat$log_price_perTA), 3)

# scatter
ggplot(dat,
       aes(x = dist_museumm,
           y = log_price_perTA)) +
  geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
  geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
  geom_text(aes(label = cor_toplot,
                x = 15000,
                y = 10,
                color = "#eb5600"),
            show.legend = F) +
  labs(title = scat_title,
       x = "Distance to nearest Museum (m)",
       y = "Log Price (per total area)") +
  theme_minimal()

# map
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = dat, size = 0.2,
          aes(color = dist_museumm)) +
  geom_sf(data = museums, color = "#faef73") +
  scale_color_gradient(high = "#f7cfb7",
                       low = "#eb5600") +
  labs(title = "Distance to nearest Museum",
       color = "Dist. to Museum") +
  theme_void()

Distance to Prisons

dat <- vars_of_int %>% 
  select(log_price_perTA, dist_prisonm)

scat_title <- "Log Price (per total area) as a function of Distance to nearest Prison"

cor_toplot <- round(cor(dat$dist_prisonm, dat$log_price_perTA), 3)

# scatter
ggplot(dat,
       aes(x = dist_prisonm,
           y = log_price_perTA)) +
  geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
  geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
  geom_text(aes(label = cor_toplot,
                x = 9000,
                y = 10,
                color = "#eb5600"),
            show.legend = F) +
  labs(title = scat_title,
       x = "Distance to nearest Prison (m)",
       y = "Log Price (per total area)") +
  theme_minimal()

# map
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = dat, size = 0.2,
          aes(color = dist_prisonm)) +
  geom_sf(data = prisons, color = "#faef73") +
  scale_color_gradient(high = "#f7cfb7",
                       low = "#eb5600") +
  labs(title = "Distance to nearest Prison",
       color = "Dist. to Prison") +
  theme_void()

Distance to Historic Site

dat <- vars_of_int %>% 
  select(log_price_perTA, dist_historicm)

scat_title <- "Log Price (per total area) as a function of Distance to nearest Historic Site"

cor_toplot <- round(cor(dat$dist_historicm, dat$log_price_perTA), 3)

# scatter
ggplot(dat,
       aes(x = dist_historicm,
           y = log_price_perTA)) +
  geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
  geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
  geom_text(aes(label = cor_toplot,
                x = 6000,
                y = 10,
                color = "#eb5600"),
            show.legend = F) +
  labs(title = scat_title,
       x = "Distance to nearest Historic Site (m)",
       y = "Log Price (per total area)") +
  theme_minimal()

# map
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = dat, size = 0.2,
          aes(color = dist_historicm)) +
  geom_sf(data = historic, color = "#faef73") +
  scale_color_gradient(high = "#f7cfb7",
                       low = "#eb5600") +
  labs(title = "Distance to Historic Site",
       color = "Dist. to Hist.") +
  theme_void()